1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouchepackage licenseclassifier 16*46c4c49dSIbrahim Kanouche 17*46c4c49dSIbrahim Kanoucheimport ( 18*46c4c49dSIbrahim Kanouche "bytes" 19*46c4c49dSIbrahim Kanouche "log" 20*46c4c49dSIbrahim Kanouche "os" 21*46c4c49dSIbrahim Kanouche "path/filepath" 22*46c4c49dSIbrahim Kanouche "strings" 23*46c4c49dSIbrahim Kanouche "testing" 24*46c4c49dSIbrahim Kanouche 25*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier/stringclassifier" 26*46c4c49dSIbrahim Kanouche) 27*46c4c49dSIbrahim Kanouche 28*46c4c49dSIbrahim Kanouchevar ( 29*46c4c49dSIbrahim Kanouche agpl30, agpl30Header, apache20, bsd3, gpl20, ccbync20 string 30*46c4c49dSIbrahim Kanouche classifier *License 31*46c4c49dSIbrahim Kanouche) 32*46c4c49dSIbrahim Kanouche 33*46c4c49dSIbrahim Kanouchefunc TestMain(m *testing.M) { 34*46c4c49dSIbrahim Kanouche a30, err := ReadLicenseFile("AGPL-3.0.txt") 35*46c4c49dSIbrahim Kanouche if err != nil { 36*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of AGPL-3.0.txt: %v", err) 37*46c4c49dSIbrahim Kanouche } 38*46c4c49dSIbrahim Kanouche a30h, err := ReadLicenseFile("AGPL-3.0.header.txt") 39*46c4c49dSIbrahim Kanouche if err != nil { 40*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of AGPL-3.0.header.txt: %v", err) 41*46c4c49dSIbrahim Kanouche } 42*46c4c49dSIbrahim Kanouche a20, err := ReadLicenseFile("Apache-2.0.txt") 43*46c4c49dSIbrahim Kanouche if err != nil { 44*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of Apache-2.0.txt: %v", err) 45*46c4c49dSIbrahim Kanouche } 46*46c4c49dSIbrahim Kanouche b3, err := ReadLicenseFile("BSD-3-Clause.txt") 47*46c4c49dSIbrahim Kanouche if err != nil { 48*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of BSD-3-Clause.txt: %v", err) 49*46c4c49dSIbrahim Kanouche } 50*46c4c49dSIbrahim Kanouche g2, err := ReadLicenseFile("GPL-2.0.txt") 51*46c4c49dSIbrahim Kanouche if err != nil { 52*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of GPL-2.0.txt: %v", err) 53*46c4c49dSIbrahim Kanouche } 54*46c4c49dSIbrahim Kanouche cc20, err := ReadLicenseFile("CC-BY-NC-2.0.txt") 55*46c4c49dSIbrahim Kanouche if err != nil { 56*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of CC-BY-NC-2.0.txt: %v", err) 57*46c4c49dSIbrahim Kanouche } 58*46c4c49dSIbrahim Kanouche 59*46c4c49dSIbrahim Kanouche agpl30 = TrimExtraneousTrailingText(string(a30)) 60*46c4c49dSIbrahim Kanouche agpl30Header = TrimExtraneousTrailingText(string(a30h)) 61*46c4c49dSIbrahim Kanouche apache20 = TrimExtraneousTrailingText(string(a20)) 62*46c4c49dSIbrahim Kanouche bsd3 = TrimExtraneousTrailingText(string(b3)) 63*46c4c49dSIbrahim Kanouche gpl20 = TrimExtraneousTrailingText(string(g2)) 64*46c4c49dSIbrahim Kanouche ccbync20 = TrimExtraneousTrailingText(string(cc20)) 65*46c4c49dSIbrahim Kanouche 66*46c4c49dSIbrahim Kanouche classifier, err = New(DefaultConfidenceThreshold) 67*46c4c49dSIbrahim Kanouche if err != nil { 68*46c4c49dSIbrahim Kanouche log.Fatalf("cannot create license classifier: %v", err) 69*46c4c49dSIbrahim Kanouche } 70*46c4c49dSIbrahim Kanouche os.Exit(m.Run()) 71*46c4c49dSIbrahim Kanouche} 72*46c4c49dSIbrahim Kanouche 73*46c4c49dSIbrahim Kanouchefunc TestClassifier_NearestMatch(t *testing.T) { 74*46c4c49dSIbrahim Kanouche tests := []struct { 75*46c4c49dSIbrahim Kanouche description string 76*46c4c49dSIbrahim Kanouche filename string 77*46c4c49dSIbrahim Kanouche extraText string 78*46c4c49dSIbrahim Kanouche wantLicense string 79*46c4c49dSIbrahim Kanouche wantConfidence float64 80*46c4c49dSIbrahim Kanouche }{ 81*46c4c49dSIbrahim Kanouche { 82*46c4c49dSIbrahim Kanouche description: "AGPL 3.0 license", 83*46c4c49dSIbrahim Kanouche filename: "AGPL-3.0.txt", 84*46c4c49dSIbrahim Kanouche wantLicense: "AGPL-3.0", 85*46c4c49dSIbrahim Kanouche wantConfidence: 1.0, 86*46c4c49dSIbrahim Kanouche }, 87*46c4c49dSIbrahim Kanouche { 88*46c4c49dSIbrahim Kanouche description: "Apache 2.0 license", 89*46c4c49dSIbrahim Kanouche filename: "Apache-2.0.txt", 90*46c4c49dSIbrahim Kanouche wantLicense: "Apache-2.0", 91*46c4c49dSIbrahim Kanouche wantConfidence: 1.0, 92*46c4c49dSIbrahim Kanouche }, 93*46c4c49dSIbrahim Kanouche { 94*46c4c49dSIbrahim Kanouche description: "GPL 2.0 license", 95*46c4c49dSIbrahim Kanouche filename: "GPL-2.0.txt", 96*46c4c49dSIbrahim Kanouche wantLicense: "GPL-2.0", 97*46c4c49dSIbrahim Kanouche wantConfidence: 1.0, 98*46c4c49dSIbrahim Kanouche }, 99*46c4c49dSIbrahim Kanouche { 100*46c4c49dSIbrahim Kanouche description: "BSD 3 Clause license with extra text", 101*46c4c49dSIbrahim Kanouche filename: "BSD-3-Clause.txt", 102*46c4c49dSIbrahim Kanouche extraText: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n", 103*46c4c49dSIbrahim Kanouche wantLicense: "BSD-3-Clause", 104*46c4c49dSIbrahim Kanouche wantConfidence: 0.94, 105*46c4c49dSIbrahim Kanouche }, 106*46c4c49dSIbrahim Kanouche } 107*46c4c49dSIbrahim Kanouche 108*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 109*46c4c49dSIbrahim Kanouche for _, tt := range tests { 110*46c4c49dSIbrahim Kanouche content, err := ReadLicenseFile(tt.filename) 111*46c4c49dSIbrahim Kanouche if err != nil { 112*46c4c49dSIbrahim Kanouche t.Errorf("error reading contents of %q license: %v", tt.wantLicense, err) 113*46c4c49dSIbrahim Kanouche continue 114*46c4c49dSIbrahim Kanouche } 115*46c4c49dSIbrahim Kanouche 116*46c4c49dSIbrahim Kanouche m := classifier.NearestMatch(tt.extraText + TrimExtraneousTrailingText(string(content))) 117*46c4c49dSIbrahim Kanouche if got, want := m.Name, tt.wantLicense; got != want { 118*46c4c49dSIbrahim Kanouche t.Errorf("NearestMatch(%q) = %q, want %q", tt.description, got, want) 119*46c4c49dSIbrahim Kanouche } 120*46c4c49dSIbrahim Kanouche if got, want := m.Confidence, tt.wantConfidence; got < want { 121*46c4c49dSIbrahim Kanouche t.Errorf("NearestMatch(%q) = %v, want %v", tt.description, got, want) 122*46c4c49dSIbrahim Kanouche } 123*46c4c49dSIbrahim Kanouche } 124*46c4c49dSIbrahim Kanouche} 125*46c4c49dSIbrahim Kanouche 126*46c4c49dSIbrahim Kanouchefunc TestClassifier_MultipleMatch(t *testing.T) { 127*46c4c49dSIbrahim Kanouche tests := []struct { 128*46c4c49dSIbrahim Kanouche description string 129*46c4c49dSIbrahim Kanouche text string 130*46c4c49dSIbrahim Kanouche want stringclassifier.Matches 131*46c4c49dSIbrahim Kanouche }{ 132*46c4c49dSIbrahim Kanouche { 133*46c4c49dSIbrahim Kanouche description: "Two licenses", 134*46c4c49dSIbrahim Kanouche text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + bsd3, 135*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 136*46c4c49dSIbrahim Kanouche { 137*46c4c49dSIbrahim Kanouche Name: "Apache-2.0", 138*46c4c49dSIbrahim Kanouche Confidence: 1.0, 139*46c4c49dSIbrahim Kanouche }, 140*46c4c49dSIbrahim Kanouche { 141*46c4c49dSIbrahim Kanouche Name: "BSD-3-Clause", 142*46c4c49dSIbrahim Kanouche Confidence: 1.0, 143*46c4c49dSIbrahim Kanouche }, 144*46c4c49dSIbrahim Kanouche }, 145*46c4c49dSIbrahim Kanouche }, 146*46c4c49dSIbrahim Kanouche { 147*46c4c49dSIbrahim Kanouche description: "Two licenses: partial match", 148*46c4c49dSIbrahim Kanouche text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + 149*46c4c49dSIbrahim Kanouche string(apache20[:len(apache20)/2-1]) + string(apache20[len(apache20)/2+7:]) + strings.Repeat("-", 80) + "\n" + 150*46c4c49dSIbrahim Kanouche string(bsd3[:len(bsd3)/2]) + "intervening stuff" + string(bsd3[len(bsd3)/2:]), 151*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 152*46c4c49dSIbrahim Kanouche { 153*46c4c49dSIbrahim Kanouche Name: "Apache-2.0", 154*46c4c49dSIbrahim Kanouche Confidence: 0.99, 155*46c4c49dSIbrahim Kanouche }, 156*46c4c49dSIbrahim Kanouche { 157*46c4c49dSIbrahim Kanouche Name: "BSD-3-Clause", 158*46c4c49dSIbrahim Kanouche Confidence: 0.98, 159*46c4c49dSIbrahim Kanouche }, 160*46c4c49dSIbrahim Kanouche }, 161*46c4c49dSIbrahim Kanouche }, 162*46c4c49dSIbrahim Kanouche { 163*46c4c49dSIbrahim Kanouche description: "Two licenses: one forbidden the other okay", 164*46c4c49dSIbrahim Kanouche text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + ccbync20, 165*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 166*46c4c49dSIbrahim Kanouche { 167*46c4c49dSIbrahim Kanouche Name: "Apache-2.0", 168*46c4c49dSIbrahim Kanouche Confidence: 0.99, 169*46c4c49dSIbrahim Kanouche }, 170*46c4c49dSIbrahim Kanouche { 171*46c4c49dSIbrahim Kanouche Name: "CC-BY-NC-2.0", 172*46c4c49dSIbrahim Kanouche Confidence: 1.0, 173*46c4c49dSIbrahim Kanouche }, 174*46c4c49dSIbrahim Kanouche }, 175*46c4c49dSIbrahim Kanouche }, 176*46c4c49dSIbrahim Kanouche { 177*46c4c49dSIbrahim Kanouche description: "Two licenses without any space between them.", 178*46c4c49dSIbrahim Kanouche text: apache20 + "." + bsd3, 179*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 180*46c4c49dSIbrahim Kanouche { 181*46c4c49dSIbrahim Kanouche Name: "Apache-2.0", 182*46c4c49dSIbrahim Kanouche Confidence: 1.0, 183*46c4c49dSIbrahim Kanouche }, 184*46c4c49dSIbrahim Kanouche { 185*46c4c49dSIbrahim Kanouche Name: "BSD-3-Clause", 186*46c4c49dSIbrahim Kanouche Confidence: 1.0, 187*46c4c49dSIbrahim Kanouche }, 188*46c4c49dSIbrahim Kanouche }, 189*46c4c49dSIbrahim Kanouche }, 190*46c4c49dSIbrahim Kanouche } 191*46c4c49dSIbrahim Kanouche 192*46c4c49dSIbrahim Kanouche classifier.Threshold = 0.95 193*46c4c49dSIbrahim Kanouche defer func() { 194*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 195*46c4c49dSIbrahim Kanouche }() 196*46c4c49dSIbrahim Kanouche for _, tt := range tests { 197*46c4c49dSIbrahim Kanouche m := classifier.MultipleMatch(tt.text, false) 198*46c4c49dSIbrahim Kanouche if len(m) != len(tt.want) { 199*46c4c49dSIbrahim Kanouche t.Fatalf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want)) 200*46c4c49dSIbrahim Kanouche continue 201*46c4c49dSIbrahim Kanouche } 202*46c4c49dSIbrahim Kanouche 203*46c4c49dSIbrahim Kanouche for i := 0; i < len(m); i++ { 204*46c4c49dSIbrahim Kanouche w := tt.want[i] 205*46c4c49dSIbrahim Kanouche if got, want := m[i].Name, w.Name; got != want { 206*46c4c49dSIbrahim Kanouche t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want) 207*46c4c49dSIbrahim Kanouche } 208*46c4c49dSIbrahim Kanouche if got, want := m[i].Confidence, w.Confidence; got < want { 209*46c4c49dSIbrahim Kanouche t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want) 210*46c4c49dSIbrahim Kanouche } 211*46c4c49dSIbrahim Kanouche } 212*46c4c49dSIbrahim Kanouche } 213*46c4c49dSIbrahim Kanouche} 214*46c4c49dSIbrahim Kanouche 215*46c4c49dSIbrahim Kanouchefunc TestClassifier_MultipleMatch_Headers(t *testing.T) { 216*46c4c49dSIbrahim Kanouche tests := []struct { 217*46c4c49dSIbrahim Kanouche description string 218*46c4c49dSIbrahim Kanouche text string 219*46c4c49dSIbrahim Kanouche want stringclassifier.Matches 220*46c4c49dSIbrahim Kanouche }{ 221*46c4c49dSIbrahim Kanouche { 222*46c4c49dSIbrahim Kanouche description: "AGPL-3.0 header", 223*46c4c49dSIbrahim Kanouche text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + agpl30Header, 224*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 225*46c4c49dSIbrahim Kanouche { 226*46c4c49dSIbrahim Kanouche Name: "AGPL-3.0", 227*46c4c49dSIbrahim Kanouche Confidence: 1.0, 228*46c4c49dSIbrahim Kanouche Offset: 0, 229*46c4c49dSIbrahim Kanouche }, 230*46c4c49dSIbrahim Kanouche }, 231*46c4c49dSIbrahim Kanouche }, 232*46c4c49dSIbrahim Kanouche { 233*46c4c49dSIbrahim Kanouche description: "Modified LGPL-2.1 header", 234*46c4c49dSIbrahim Kanouche text: `Common Widget code. 235*46c4c49dSIbrahim Kanouche 236*46c4c49dSIbrahim KanoucheCopyright (C) 2013-2015 Yoyodyne, Inc. 237*46c4c49dSIbrahim Kanouche 238*46c4c49dSIbrahim KanoucheThis library is free software; you can redistribute it and/or 239*46c4c49dSIbrahim Kanouchemodify it under the terms of the GNU Lesser General Public 240*46c4c49dSIbrahim KanoucheLicense as published by the Free Software Foundation; either 241*46c4c49dSIbrahim Kanoucheversion 2.1 of the License, or (at your option) any later version (but not!). 242*46c4c49dSIbrahim Kanouche 243*46c4c49dSIbrahim KanoucheThis library is distributed in the hope that it will be useful, 244*46c4c49dSIbrahim Kanouchebut WITHOUT ANY WARRANTY; without even the implied warranty of 245*46c4c49dSIbrahim KanoucheMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 246*46c4c49dSIbrahim KanoucheLesser General Public License for more details. 247*46c4c49dSIbrahim Kanouche 248*46c4c49dSIbrahim KanoucheYou should have received a copy of the GNU Lesser General Public 249*46c4c49dSIbrahim KanoucheLicense along with this library; if not, write to the Free Software 250*46c4c49dSIbrahim KanoucheFoundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 251*46c4c49dSIbrahim Kanouche`, 252*46c4c49dSIbrahim Kanouche want: stringclassifier.Matches{ 253*46c4c49dSIbrahim Kanouche { 254*46c4c49dSIbrahim Kanouche Name: "LGPL-2.1", 255*46c4c49dSIbrahim Kanouche Confidence: 0.97, 256*46c4c49dSIbrahim Kanouche Offset: 197, 257*46c4c49dSIbrahim Kanouche }, 258*46c4c49dSIbrahim Kanouche }, 259*46c4c49dSIbrahim Kanouche }, 260*46c4c49dSIbrahim Kanouche } 261*46c4c49dSIbrahim Kanouche 262*46c4c49dSIbrahim Kanouche classifier.Threshold = 0.90 263*46c4c49dSIbrahim Kanouche defer func() { 264*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 265*46c4c49dSIbrahim Kanouche }() 266*46c4c49dSIbrahim Kanouche for _, tt := range tests { 267*46c4c49dSIbrahim Kanouche m := classifier.MultipleMatch(tt.text, true) 268*46c4c49dSIbrahim Kanouche if len(m) != len(tt.want) { 269*46c4c49dSIbrahim Kanouche t.Errorf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want)) 270*46c4c49dSIbrahim Kanouche continue 271*46c4c49dSIbrahim Kanouche } 272*46c4c49dSIbrahim Kanouche 273*46c4c49dSIbrahim Kanouche for i := 0; i < len(m); i++ { 274*46c4c49dSIbrahim Kanouche w := tt.want[i] 275*46c4c49dSIbrahim Kanouche if got, want := m[i].Name, w.Name; got != want { 276*46c4c49dSIbrahim Kanouche t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want) 277*46c4c49dSIbrahim Kanouche } 278*46c4c49dSIbrahim Kanouche if got, want := m[i].Confidence, w.Confidence; got < want { 279*46c4c49dSIbrahim Kanouche t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want) 280*46c4c49dSIbrahim Kanouche } 281*46c4c49dSIbrahim Kanouche } 282*46c4c49dSIbrahim Kanouche } 283*46c4c49dSIbrahim Kanouche} 284*46c4c49dSIbrahim Kanouche 285*46c4c49dSIbrahim Kanouchefunc TestClassifier_CopyrightHolder(t *testing.T) { 286*46c4c49dSIbrahim Kanouche tests := []struct { 287*46c4c49dSIbrahim Kanouche copyright string 288*46c4c49dSIbrahim Kanouche want string 289*46c4c49dSIbrahim Kanouche }{ 290*46c4c49dSIbrahim Kanouche { 291*46c4c49dSIbrahim Kanouche copyright: "Copyright 2008 Yoyodyne Inc. All Rights Reserved.", 292*46c4c49dSIbrahim Kanouche want: "Yoyodyne Inc.", 293*46c4c49dSIbrahim Kanouche }, 294*46c4c49dSIbrahim Kanouche { 295*46c4c49dSIbrahim Kanouche copyright: "Copyright 2010-2016 Yoyodyne, Inc.", 296*46c4c49dSIbrahim Kanouche want: "Yoyodyne, Inc.", 297*46c4c49dSIbrahim Kanouche }, 298*46c4c49dSIbrahim Kanouche { 299*46c4c49dSIbrahim Kanouche copyright: "Copyright 2010, 2011, 2012 Yoyodyne, Inc., All rights reserved.", 300*46c4c49dSIbrahim Kanouche want: "Yoyodyne, Inc.", 301*46c4c49dSIbrahim Kanouche }, 302*46c4c49dSIbrahim Kanouche { 303*46c4c49dSIbrahim Kanouche copyright: "Copyright (c) 2015 Yoyodyne, Inc. All rights reserved.", 304*46c4c49dSIbrahim Kanouche want: "Yoyodyne, Inc.", 305*46c4c49dSIbrahim Kanouche }, 306*46c4c49dSIbrahim Kanouche { 307*46c4c49dSIbrahim Kanouche copyright: "Copyright © 1998 by Yoyodyne, Inc., San Narciso, CA, US.", 308*46c4c49dSIbrahim Kanouche want: "Yoyodyne, Inc., San Narciso, CA, US", 309*46c4c49dSIbrahim Kanouche }, 310*46c4c49dSIbrahim Kanouche { 311*46c4c49dSIbrahim Kanouche copyright: "Copyright (c) 2015 The Algonquin Round Table. All rights reserved.", 312*46c4c49dSIbrahim Kanouche want: "The Algonquin Round Table", 313*46c4c49dSIbrahim Kanouche }, 314*46c4c49dSIbrahim Kanouche { 315*46c4c49dSIbrahim Kanouche copyright: "Copyright 2016, The Android Open Source Project", 316*46c4c49dSIbrahim Kanouche want: "The Android Open Source Project", 317*46c4c49dSIbrahim Kanouche }, 318*46c4c49dSIbrahim Kanouche { 319*46c4c49dSIbrahim Kanouche copyright: `--------------------------------------------------------- 320*46c4c49dSIbrahim Kanouchefoo.c: 321*46c4c49dSIbrahim KanoucheCopyright 2016, The Android Open Source Project 322*46c4c49dSIbrahim Kanouche`, 323*46c4c49dSIbrahim Kanouche want: "The Android Open Source Project", 324*46c4c49dSIbrahim Kanouche }, 325*46c4c49dSIbrahim Kanouche } 326*46c4c49dSIbrahim Kanouche 327*46c4c49dSIbrahim Kanouche for _, tt := range tests { 328*46c4c49dSIbrahim Kanouche got := CopyrightHolder(tt.copyright) 329*46c4c49dSIbrahim Kanouche if got != tt.want { 330*46c4c49dSIbrahim Kanouche t.Errorf("CopyrightHolder(%q) = %q, want %q", tt.copyright, got, tt.want) 331*46c4c49dSIbrahim Kanouche } 332*46c4c49dSIbrahim Kanouche } 333*46c4c49dSIbrahim Kanouche} 334*46c4c49dSIbrahim Kanouche 335*46c4c49dSIbrahim Kanouchefunc TestClassifier_WithinConfidenceThreshold(t *testing.T) { 336*46c4c49dSIbrahim Kanouche tests := []struct { 337*46c4c49dSIbrahim Kanouche description string 338*46c4c49dSIbrahim Kanouche text string 339*46c4c49dSIbrahim Kanouche confDef bool 340*46c4c49dSIbrahim Kanouche conf99 bool 341*46c4c49dSIbrahim Kanouche conf93 bool 342*46c4c49dSIbrahim Kanouche conf5 bool 343*46c4c49dSIbrahim Kanouche }{ 344*46c4c49dSIbrahim Kanouche { 345*46c4c49dSIbrahim Kanouche description: "Apache 2.0", 346*46c4c49dSIbrahim Kanouche text: apache20, 347*46c4c49dSIbrahim Kanouche confDef: true, 348*46c4c49dSIbrahim Kanouche conf99: true, 349*46c4c49dSIbrahim Kanouche conf93: true, 350*46c4c49dSIbrahim Kanouche conf5: true, 351*46c4c49dSIbrahim Kanouche }, 352*46c4c49dSIbrahim Kanouche { 353*46c4c49dSIbrahim Kanouche description: "GPL 2.0", 354*46c4c49dSIbrahim Kanouche text: gpl20, 355*46c4c49dSIbrahim Kanouche confDef: true, 356*46c4c49dSIbrahim Kanouche conf99: true, 357*46c4c49dSIbrahim Kanouche conf93: true, 358*46c4c49dSIbrahim Kanouche conf5: true, 359*46c4c49dSIbrahim Kanouche }, 360*46c4c49dSIbrahim Kanouche { 361*46c4c49dSIbrahim Kanouche description: "BSD 3 Clause license with extra text", 362*46c4c49dSIbrahim Kanouche text: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n" + bsd3, 363*46c4c49dSIbrahim Kanouche confDef: true, 364*46c4c49dSIbrahim Kanouche conf99: true, 365*46c4c49dSIbrahim Kanouche conf93: true, 366*46c4c49dSIbrahim Kanouche conf5: true, 367*46c4c49dSIbrahim Kanouche }, 368*46c4c49dSIbrahim Kanouche { 369*46c4c49dSIbrahim Kanouche description: "Very low confidence", 370*46c4c49dSIbrahim Kanouche text: strings.Repeat("Random text is random, but not a license\n", 40), 371*46c4c49dSIbrahim Kanouche confDef: false, 372*46c4c49dSIbrahim Kanouche conf99: false, 373*46c4c49dSIbrahim Kanouche conf93: false, 374*46c4c49dSIbrahim Kanouche conf5: true, 375*46c4c49dSIbrahim Kanouche }, 376*46c4c49dSIbrahim Kanouche } 377*46c4c49dSIbrahim Kanouche 378*46c4c49dSIbrahim Kanouche defer func() { 379*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 380*46c4c49dSIbrahim Kanouche }() 381*46c4c49dSIbrahim Kanouche for _, tt := range tests { 382*46c4c49dSIbrahim Kanouche t.Run(tt.description, func(t *testing.T) { 383*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 384*46c4c49dSIbrahim Kanouche m := classifier.NearestMatch(tt.text) 385*46c4c49dSIbrahim Kanouche if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.confDef { 386*46c4c49dSIbrahim Kanouche t.Errorf("WithinConfidenceThreshold() at %v returned wrong result; got %v, want %v", classifier.Threshold, got, tt.confDef) 387*46c4c49dSIbrahim Kanouche } 388*46c4c49dSIbrahim Kanouche 389*46c4c49dSIbrahim Kanouche classifier.Threshold = 0.99 390*46c4c49dSIbrahim Kanouche m = classifier.NearestMatch(tt.text) 391*46c4c49dSIbrahim Kanouche if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf99 { 392*46c4c49dSIbrahim Kanouche t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf99) 393*46c4c49dSIbrahim Kanouche } 394*46c4c49dSIbrahim Kanouche 395*46c4c49dSIbrahim Kanouche classifier.Threshold = 0.93 396*46c4c49dSIbrahim Kanouche m = classifier.NearestMatch(tt.text) 397*46c4c49dSIbrahim Kanouche if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf93 { 398*46c4c49dSIbrahim Kanouche t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf93) 399*46c4c49dSIbrahim Kanouche } 400*46c4c49dSIbrahim Kanouche 401*46c4c49dSIbrahim Kanouche classifier.Threshold = 0.05 402*46c4c49dSIbrahim Kanouche m = classifier.NearestMatch(tt.text) 403*46c4c49dSIbrahim Kanouche if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf5 { 404*46c4c49dSIbrahim Kanouche t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf5) 405*46c4c49dSIbrahim Kanouche } 406*46c4c49dSIbrahim Kanouche }) 407*46c4c49dSIbrahim Kanouche } 408*46c4c49dSIbrahim Kanouche} 409*46c4c49dSIbrahim Kanouche 410*46c4c49dSIbrahim Kanouchefunc TestRemoveIgnorableText(t *testing.T) { 411*46c4c49dSIbrahim Kanouche const want = `Lorem ipsum dolor sit amet, pellentesque wisi tortor duis, amet adipiscing bibendum elit aliquam 412*46c4c49dSIbrahim Kanoucheleo. Mattis commodo sed accumsan at in. 413*46c4c49dSIbrahim Kanouche` 414*46c4c49dSIbrahim Kanouche 415*46c4c49dSIbrahim Kanouche tests := []struct { 416*46c4c49dSIbrahim Kanouche original string 417*46c4c49dSIbrahim Kanouche want string 418*46c4c49dSIbrahim Kanouche }{ 419*46c4c49dSIbrahim Kanouche {"MIT License\n", "\n"}, 420*46c4c49dSIbrahim Kanouche {"The MIT License\n", "\n"}, 421*46c4c49dSIbrahim Kanouche {"The MIT License (MIT)\n", "\n"}, 422*46c4c49dSIbrahim Kanouche {"BSD License\n", "\n"}, 423*46c4c49dSIbrahim Kanouche {"New BSD License\n", "\n"}, 424*46c4c49dSIbrahim Kanouche {"COPYRIGHT AND PERMISSION NOTICE\n", "\n"}, 425*46c4c49dSIbrahim Kanouche {"Copyright (c) 2016, Yoyodyne, Inc.\n", "\n"}, 426*46c4c49dSIbrahim Kanouche {"All rights reserved.\n", "\n"}, 427*46c4c49dSIbrahim Kanouche {"Some rights reserved.\n", "\n"}, 428*46c4c49dSIbrahim Kanouche {"@license\n", "\n"}, 429*46c4c49dSIbrahim Kanouche 430*46c4c49dSIbrahim Kanouche // Now with wanted texts. 431*46c4c49dSIbrahim Kanouche { 432*46c4c49dSIbrahim Kanouche original: `The MIT License 433*46c4c49dSIbrahim Kanouche 434*46c4c49dSIbrahim KanoucheCopyright (c) 2016, Yoyodyne, Inc. 435*46c4c49dSIbrahim KanoucheAll rights reserved. 436*46c4c49dSIbrahim Kanouche` + want, 437*46c4c49dSIbrahim Kanouche want: strings.ToLower(want), 438*46c4c49dSIbrahim Kanouche }, 439*46c4c49dSIbrahim Kanouche } 440*46c4c49dSIbrahim Kanouche 441*46c4c49dSIbrahim Kanouche for _, tt := range tests { 442*46c4c49dSIbrahim Kanouche if got := removeIgnorableTexts(strings.ToLower(tt.original)); got != tt.want { 443*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want) 444*46c4c49dSIbrahim Kanouche } 445*46c4c49dSIbrahim Kanouche } 446*46c4c49dSIbrahim Kanouche} 447*46c4c49dSIbrahim Kanouche 448*46c4c49dSIbrahim Kanouchefunc TestRemoveShebangLine(t *testing.T) { 449*46c4c49dSIbrahim Kanouche tests := []struct { 450*46c4c49dSIbrahim Kanouche original string 451*46c4c49dSIbrahim Kanouche want string 452*46c4c49dSIbrahim Kanouche }{ 453*46c4c49dSIbrahim Kanouche { 454*46c4c49dSIbrahim Kanouche original: "", 455*46c4c49dSIbrahim Kanouche want: "", 456*46c4c49dSIbrahim Kanouche }, 457*46c4c49dSIbrahim Kanouche { 458*46c4c49dSIbrahim Kanouche original: "#!/usr/bin/env python -C", 459*46c4c49dSIbrahim Kanouche want: "#!/usr/bin/env python -C", 460*46c4c49dSIbrahim Kanouche }, 461*46c4c49dSIbrahim Kanouche { 462*46c4c49dSIbrahim Kanouche original: `#!/usr/bin/env python -C 463*46c4c49dSIbrahim Kanouche# First line of license text. 464*46c4c49dSIbrahim Kanouche# Second line of license text. 465*46c4c49dSIbrahim Kanouche`, 466*46c4c49dSIbrahim Kanouche want: `# First line of license text. 467*46c4c49dSIbrahim Kanouche# Second line of license text. 468*46c4c49dSIbrahim Kanouche`, 469*46c4c49dSIbrahim Kanouche }, 470*46c4c49dSIbrahim Kanouche { 471*46c4c49dSIbrahim Kanouche original: `# First line of license text. 472*46c4c49dSIbrahim Kanouche# Second line of license text. 473*46c4c49dSIbrahim Kanouche`, 474*46c4c49dSIbrahim Kanouche want: `# First line of license text. 475*46c4c49dSIbrahim Kanouche# Second line of license text. 476*46c4c49dSIbrahim Kanouche`, 477*46c4c49dSIbrahim Kanouche }, 478*46c4c49dSIbrahim Kanouche } 479*46c4c49dSIbrahim Kanouche 480*46c4c49dSIbrahim Kanouche for _, tt := range tests { 481*46c4c49dSIbrahim Kanouche got := removeShebangLine(tt.original) 482*46c4c49dSIbrahim Kanouche if got != tt.want { 483*46c4c49dSIbrahim Kanouche t.Errorf("RemoveShebangLine(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want) 484*46c4c49dSIbrahim Kanouche } 485*46c4c49dSIbrahim Kanouche } 486*46c4c49dSIbrahim Kanouche} 487*46c4c49dSIbrahim Kanouche 488*46c4c49dSIbrahim Kanouchefunc TestRemoveNonWords(t *testing.T) { 489*46c4c49dSIbrahim Kanouche tests := []struct { 490*46c4c49dSIbrahim Kanouche original string 491*46c4c49dSIbrahim Kanouche want string 492*46c4c49dSIbrahim Kanouche }{ 493*46c4c49dSIbrahim Kanouche { 494*46c4c49dSIbrahim Kanouche original: `# # Hello 495*46c4c49dSIbrahim Kanouche## World 496*46c4c49dSIbrahim Kanouche`, 497*46c4c49dSIbrahim Kanouche want: ` Hello World `, 498*46c4c49dSIbrahim Kanouche }, 499*46c4c49dSIbrahim Kanouche { 500*46c4c49dSIbrahim Kanouche original: ` * This text has a bulleted list: 501*46c4c49dSIbrahim Kanouche * * item 1 502*46c4c49dSIbrahim Kanouche * * item 2`, 503*46c4c49dSIbrahim Kanouche want: ` This text has a bulleted list item 1 item 2`, 504*46c4c49dSIbrahim Kanouche }, 505*46c4c49dSIbrahim Kanouche { 506*46c4c49dSIbrahim Kanouche original: ` 507*46c4c49dSIbrahim Kanouche 508*46c4c49dSIbrahim Kanouche * This text has a bulleted list: 509*46c4c49dSIbrahim Kanouche * * item 1 510*46c4c49dSIbrahim Kanouche * * item 2`, 511*46c4c49dSIbrahim Kanouche want: ` This text has a bulleted list item 1 item 2`, 512*46c4c49dSIbrahim Kanouche }, 513*46c4c49dSIbrahim Kanouche { 514*46c4c49dSIbrahim Kanouche original: `// This text has a bulleted list: 515*46c4c49dSIbrahim Kanouche// 1. item 1 516*46c4c49dSIbrahim Kanouche// 2. item 2`, 517*46c4c49dSIbrahim Kanouche want: ` This text has a bulleted list 1 item 1 2 item 2`, 518*46c4c49dSIbrahim Kanouche }, 519*46c4c49dSIbrahim Kanouche { 520*46c4c49dSIbrahim Kanouche original: `// «Copyright (c) 1998 Yoyodyne, Inc.» 521*46c4c49dSIbrahim Kanouche// This text has a bulleted list: 522*46c4c49dSIbrahim Kanouche// 1. item 1 523*46c4c49dSIbrahim Kanouche// 2. item 2 524*46c4c49dSIbrahim Kanouche`, 525*46c4c49dSIbrahim Kanouche want: ` «Copyright c 1998 Yoyodyne Inc » This text has a bulleted list 1 item 1 2 item 2 `, 526*46c4c49dSIbrahim Kanouche }, 527*46c4c49dSIbrahim Kanouche { 528*46c4c49dSIbrahim Kanouche original: `* 529*46c4c49dSIbrahim Kanouche * This is the first line we want. 530*46c4c49dSIbrahim Kanouche * This is the second line we want. 531*46c4c49dSIbrahim Kanouche * This is the third line we want. 532*46c4c49dSIbrahim Kanouche * This is the last line we want. 533*46c4c49dSIbrahim Kanouche`, 534*46c4c49dSIbrahim Kanouche want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `, 535*46c4c49dSIbrahim Kanouche }, 536*46c4c49dSIbrahim Kanouche { 537*46c4c49dSIbrahim Kanouche original: `===---------------------------------------------=== 538*46c4c49dSIbrahim Kanouche*** 539*46c4c49dSIbrahim Kanouche* This is the first line we want. 540*46c4c49dSIbrahim Kanouche* This is the second line we want. 541*46c4c49dSIbrahim Kanouche* This is the third line we want. 542*46c4c49dSIbrahim Kanouche* This is the last line we want. 543*46c4c49dSIbrahim Kanouche*** 544*46c4c49dSIbrahim Kanouche===---------------------------------------------=== 545*46c4c49dSIbrahim Kanouche`, 546*46c4c49dSIbrahim Kanouche want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `, 547*46c4c49dSIbrahim Kanouche }, 548*46c4c49dSIbrahim Kanouche { 549*46c4c49dSIbrahim Kanouche original: strings.Repeat("-", 80), 550*46c4c49dSIbrahim Kanouche want: " ", 551*46c4c49dSIbrahim Kanouche }, 552*46c4c49dSIbrahim Kanouche { 553*46c4c49dSIbrahim Kanouche original: strings.Repeat("=", 80), 554*46c4c49dSIbrahim Kanouche want: " ", 555*46c4c49dSIbrahim Kanouche }, 556*46c4c49dSIbrahim Kanouche { 557*46c4c49dSIbrahim Kanouche original: "/*\n", 558*46c4c49dSIbrahim Kanouche want: " ", 559*46c4c49dSIbrahim Kanouche }, 560*46c4c49dSIbrahim Kanouche { 561*46c4c49dSIbrahim Kanouche original: "/*\n * precursor text\n */\n", 562*46c4c49dSIbrahim Kanouche want: " precursor text ", 563*46c4c49dSIbrahim Kanouche }, 564*46c4c49dSIbrahim Kanouche // Test for b/63540492. 565*46c4c49dSIbrahim Kanouche { 566*46c4c49dSIbrahim Kanouche original: " */\n", 567*46c4c49dSIbrahim Kanouche want: " ", 568*46c4c49dSIbrahim Kanouche }, 569*46c4c49dSIbrahim Kanouche { 570*46c4c49dSIbrahim Kanouche original: "", 571*46c4c49dSIbrahim Kanouche want: "", 572*46c4c49dSIbrahim Kanouche }, 573*46c4c49dSIbrahim Kanouche } 574*46c4c49dSIbrahim Kanouche 575*46c4c49dSIbrahim Kanouche for _, tt := range tests { 576*46c4c49dSIbrahim Kanouche if got := stringclassifier.FlattenWhitespace(RemoveNonWords(tt.original)); got != tt.want { 577*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch(%q) => %v, want %v", tt.original, got, tt.want) 578*46c4c49dSIbrahim Kanouche } 579*46c4c49dSIbrahim Kanouche } 580*46c4c49dSIbrahim Kanouche} 581*46c4c49dSIbrahim Kanouche 582*46c4c49dSIbrahim Kanouchefunc TestNormalizePunctuation(t *testing.T) { 583*46c4c49dSIbrahim Kanouche tests := []struct { 584*46c4c49dSIbrahim Kanouche original string 585*46c4c49dSIbrahim Kanouche want string 586*46c4c49dSIbrahim Kanouche }{ 587*46c4c49dSIbrahim Kanouche // Hyphens and dashes. 588*46c4c49dSIbrahim Kanouche {"—", "-"}, 589*46c4c49dSIbrahim Kanouche {"-", "-"}, 590*46c4c49dSIbrahim Kanouche {"‒", "-"}, 591*46c4c49dSIbrahim Kanouche {"–", "-"}, 592*46c4c49dSIbrahim Kanouche {"—", "-"}, 593*46c4c49dSIbrahim Kanouche 594*46c4c49dSIbrahim Kanouche // Quotes. 595*46c4c49dSIbrahim Kanouche {"'", "'"}, 596*46c4c49dSIbrahim Kanouche {`"`, "'"}, 597*46c4c49dSIbrahim Kanouche {"‘", "'"}, 598*46c4c49dSIbrahim Kanouche {"’", "'"}, 599*46c4c49dSIbrahim Kanouche {"“", "'"}, 600*46c4c49dSIbrahim Kanouche {"”", "'"}, 601*46c4c49dSIbrahim Kanouche {" ” ", " ' "}, 602*46c4c49dSIbrahim Kanouche 603*46c4c49dSIbrahim Kanouche // Backtick. 604*46c4c49dSIbrahim Kanouche {"`", "'"}, 605*46c4c49dSIbrahim Kanouche 606*46c4c49dSIbrahim Kanouche // Copyright mark. 607*46c4c49dSIbrahim Kanouche {"©", "(c)"}, 608*46c4c49dSIbrahim Kanouche 609*46c4c49dSIbrahim Kanouche // Hyphen-separated words. 610*46c4c49dSIbrahim Kanouche {"general- purpose, non- compliant", "general-purpose, non-compliant"}, 611*46c4c49dSIbrahim Kanouche 612*46c4c49dSIbrahim Kanouche // Section. 613*46c4c49dSIbrahim Kanouche {"§", "(s)"}, 614*46c4c49dSIbrahim Kanouche {"¤", "(s)"}, 615*46c4c49dSIbrahim Kanouche } 616*46c4c49dSIbrahim Kanouche 617*46c4c49dSIbrahim Kanouche for _, tt := range tests { 618*46c4c49dSIbrahim Kanouche if got := NormalizePunctuation(tt.original); got != tt.want { 619*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch => %v, want %v", got, tt.want) 620*46c4c49dSIbrahim Kanouche } 621*46c4c49dSIbrahim Kanouche } 622*46c4c49dSIbrahim Kanouche} 623*46c4c49dSIbrahim Kanouche 624*46c4c49dSIbrahim Kanouchefunc TestNormalizeEquivalentWords(t *testing.T) { 625*46c4c49dSIbrahim Kanouche tests := []struct { 626*46c4c49dSIbrahim Kanouche original string 627*46c4c49dSIbrahim Kanouche want string 628*46c4c49dSIbrahim Kanouche }{ 629*46c4c49dSIbrahim Kanouche {"acknowledgment", "Acknowledgement"}, 630*46c4c49dSIbrahim Kanouche {"ANalogue", "Analog"}, 631*46c4c49dSIbrahim Kanouche {"AnAlyse", "Analyze"}, 632*46c4c49dSIbrahim Kanouche {"ArtefacT", "Artifact"}, 633*46c4c49dSIbrahim Kanouche {"authorisation", "Authorization"}, 634*46c4c49dSIbrahim Kanouche {"AuthoriSed", "Authorized"}, 635*46c4c49dSIbrahim Kanouche {"CalIbre", "Caliber"}, 636*46c4c49dSIbrahim Kanouche {"CanCelled", "Canceled"}, 637*46c4c49dSIbrahim Kanouche {"CapitaliSations", "Capitalizations"}, 638*46c4c49dSIbrahim Kanouche {"CatalogUe", "Catalog"}, 639*46c4c49dSIbrahim Kanouche {"CategoriSe", "Categorize"}, 640*46c4c49dSIbrahim Kanouche {"CentRE", "Center"}, 641*46c4c49dSIbrahim Kanouche {"EmphasiSed", "Emphasized"}, 642*46c4c49dSIbrahim Kanouche {"FavoUr", "Favor"}, 643*46c4c49dSIbrahim Kanouche {"FavoUrite", "Favorite"}, 644*46c4c49dSIbrahim Kanouche {"FulfiL", "Fulfill"}, 645*46c4c49dSIbrahim Kanouche {"FulfiLment", "Fulfillment"}, 646*46c4c49dSIbrahim Kanouche {"InitialiSe", "Initialize"}, 647*46c4c49dSIbrahim Kanouche {"JudGMent", "Judgement"}, 648*46c4c49dSIbrahim Kanouche {"LabelLing", "Labeling"}, 649*46c4c49dSIbrahim Kanouche {"LaboUr", "Labor"}, 650*46c4c49dSIbrahim Kanouche {"LicenCe", "License"}, 651*46c4c49dSIbrahim Kanouche {"MaximiSe", "Maximize"}, 652*46c4c49dSIbrahim Kanouche {"ModelLed", "Modeled"}, 653*46c4c49dSIbrahim Kanouche {"ModeLling", "Modeling"}, 654*46c4c49dSIbrahim Kanouche {"OffenCe", "Offense"}, 655*46c4c49dSIbrahim Kanouche {"OptimiSe", "Optimize"}, 656*46c4c49dSIbrahim Kanouche {"OrganiSation", "Organization"}, 657*46c4c49dSIbrahim Kanouche {"OrganiSe", "Organize"}, 658*46c4c49dSIbrahim Kanouche {"PractiSe", "Practice"}, 659*46c4c49dSIbrahim Kanouche {"ProgramME", "Program"}, 660*46c4c49dSIbrahim Kanouche {"RealiSe", "Realize"}, 661*46c4c49dSIbrahim Kanouche {"RecogniSe", "Recognize"}, 662*46c4c49dSIbrahim Kanouche {"SignalLing", "Signaling"}, 663*46c4c49dSIbrahim Kanouche {"sub-license", "Sublicense"}, 664*46c4c49dSIbrahim Kanouche {"sub license", "Sublicense"}, 665*46c4c49dSIbrahim Kanouche {"UtiliSation", "Utilization"}, 666*46c4c49dSIbrahim Kanouche {"WhilST", "While"}, 667*46c4c49dSIbrahim Kanouche {"WilfuL", "Wilfull"}, 668*46c4c49dSIbrahim Kanouche {"Non-coMMercial", "Noncommercial"}, 669*46c4c49dSIbrahim Kanouche {"Per Cent", "Percent"}, 670*46c4c49dSIbrahim Kanouche } 671*46c4c49dSIbrahim Kanouche 672*46c4c49dSIbrahim Kanouche for _, tt := range tests { 673*46c4c49dSIbrahim Kanouche if got := NormalizeEquivalentWords(tt.original); got != tt.want { 674*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch => %v, want %v", got, tt.want) 675*46c4c49dSIbrahim Kanouche } 676*46c4c49dSIbrahim Kanouche } 677*46c4c49dSIbrahim Kanouche} 678*46c4c49dSIbrahim Kanouche 679*46c4c49dSIbrahim Kanouchefunc TestTrimExtraneousTrailingText(t *testing.T) { 680*46c4c49dSIbrahim Kanouche tests := []struct { 681*46c4c49dSIbrahim Kanouche original string 682*46c4c49dSIbrahim Kanouche want string 683*46c4c49dSIbrahim Kanouche }{ 684*46c4c49dSIbrahim Kanouche { 685*46c4c49dSIbrahim Kanouche original: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL 686*46c4c49dSIbrahim Kanouche ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE 687*46c4c49dSIbrahim Kanouche THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 688*46c4c49dSIbrahim Kanouche GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 689*46c4c49dSIbrahim Kanouche USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 690*46c4c49dSIbrahim Kanouche DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 691*46c4c49dSIbrahim Kanouche PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 692*46c4c49dSIbrahim Kanouche EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 693*46c4c49dSIbrahim Kanouche SUCH DAMAGES. 694*46c4c49dSIbrahim Kanouche 695*46c4c49dSIbrahim Kanouche END OF TERMS AND CONDITIONS 696*46c4c49dSIbrahim Kanouche 697*46c4c49dSIbrahim Kanouche How to Apply These Terms to Your New Programs 698*46c4c49dSIbrahim Kanouche 699*46c4c49dSIbrahim Kanouche If you develop a new program, and you want it to be of the greatest 700*46c4c49dSIbrahim Kanouche possible use to the public, the best way to achieve this is to make it free 701*46c4c49dSIbrahim Kanouche software which everyone can redistribute and change under these terms. 702*46c4c49dSIbrahim Kanouche`, 703*46c4c49dSIbrahim Kanouche want: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL 704*46c4c49dSIbrahim Kanouche ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE 705*46c4c49dSIbrahim Kanouche THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 706*46c4c49dSIbrahim Kanouche GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 707*46c4c49dSIbrahim Kanouche USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 708*46c4c49dSIbrahim Kanouche DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 709*46c4c49dSIbrahim Kanouche PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 710*46c4c49dSIbrahim Kanouche EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 711*46c4c49dSIbrahim Kanouche SUCH DAMAGES. 712*46c4c49dSIbrahim Kanouche 713*46c4c49dSIbrahim Kanouche END OF TERMS AND CONDITIONS`, 714*46c4c49dSIbrahim Kanouche }, 715*46c4c49dSIbrahim Kanouche } 716*46c4c49dSIbrahim Kanouche 717*46c4c49dSIbrahim Kanouche for _, tt := range tests { 718*46c4c49dSIbrahim Kanouche if got := TrimExtraneousTrailingText(tt.original); got != tt.want { 719*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch => %q, want %q", got, tt.want) 720*46c4c49dSIbrahim Kanouche } 721*46c4c49dSIbrahim Kanouche } 722*46c4c49dSIbrahim Kanouche} 723*46c4c49dSIbrahim Kanouche 724*46c4c49dSIbrahim Kanouchefunc TestCommonLicenseWords(t *testing.T) { 725*46c4c49dSIbrahim Kanouche files, err := ReadLicenseDir() 726*46c4c49dSIbrahim Kanouche if err != nil { 727*46c4c49dSIbrahim Kanouche t.Fatalf("error: cannot read licenses directory: %v", err) 728*46c4c49dSIbrahim Kanouche } 729*46c4c49dSIbrahim Kanouche if files == nil { 730*46c4c49dSIbrahim Kanouche t.Fatal("error: cannot get licenses from license directory") 731*46c4c49dSIbrahim Kanouche } 732*46c4c49dSIbrahim Kanouche 733*46c4c49dSIbrahim Kanouche for _, file := range files { 734*46c4c49dSIbrahim Kanouche if filepath.Ext(file.Name()) != ".txt" { 735*46c4c49dSIbrahim Kanouche continue 736*46c4c49dSIbrahim Kanouche } 737*46c4c49dSIbrahim Kanouche text, err := ReadLicenseFile(file.Name()) 738*46c4c49dSIbrahim Kanouche if err != nil { 739*46c4c49dSIbrahim Kanouche t.Fatalf("error reading contents of %q: %v", file.Name(), err) 740*46c4c49dSIbrahim Kanouche } 741*46c4c49dSIbrahim Kanouche 742*46c4c49dSIbrahim Kanouche if got := classifier.hasCommonLicenseWords(string(text)); !got { 743*46c4c49dSIbrahim Kanouche t.Errorf("Mismatch(%q) => false, want true", file.Name()) 744*46c4c49dSIbrahim Kanouche } 745*46c4c49dSIbrahim Kanouche } 746*46c4c49dSIbrahim Kanouche 747*46c4c49dSIbrahim Kanouche text := strings.Repeat("Þetta er ekki leyfi.\n", 80) 748*46c4c49dSIbrahim Kanouche if got := classifier.hasCommonLicenseWords(text); got { 749*46c4c49dSIbrahim Kanouche t.Error("Mismatch => true, want false") 750*46c4c49dSIbrahim Kanouche } 751*46c4c49dSIbrahim Kanouche} 752*46c4c49dSIbrahim Kanouche 753*46c4c49dSIbrahim Kanouchefunc TestLicenseMatchQuality(t *testing.T) { 754*46c4c49dSIbrahim Kanouche files, err := ReadLicenseDir() 755*46c4c49dSIbrahim Kanouche if err != nil { 756*46c4c49dSIbrahim Kanouche t.Fatalf("error: cannot read licenses directory: %v", err) 757*46c4c49dSIbrahim Kanouche } 758*46c4c49dSIbrahim Kanouche 759*46c4c49dSIbrahim Kanouche classifier.Threshold = 1.0 760*46c4c49dSIbrahim Kanouche defer func() { 761*46c4c49dSIbrahim Kanouche classifier.Threshold = DefaultConfidenceThreshold 762*46c4c49dSIbrahim Kanouche }() 763*46c4c49dSIbrahim Kanouche for _, file := range files { 764*46c4c49dSIbrahim Kanouche if filepath.Ext(file.Name()) != ".txt" { 765*46c4c49dSIbrahim Kanouche continue 766*46c4c49dSIbrahim Kanouche } 767*46c4c49dSIbrahim Kanouche name := strings.TrimSuffix(file.Name(), ".txt") 768*46c4c49dSIbrahim Kanouche 769*46c4c49dSIbrahim Kanouche contents, err := ReadLicenseFile(file.Name()) 770*46c4c49dSIbrahim Kanouche if err != nil { 771*46c4c49dSIbrahim Kanouche t.Fatalf("error reading contents of %q: %v", file.Name(), err) 772*46c4c49dSIbrahim Kanouche } 773*46c4c49dSIbrahim Kanouche 774*46c4c49dSIbrahim Kanouche m := classifier.NearestMatch(TrimExtraneousTrailingText(string(contents))) 775*46c4c49dSIbrahim Kanouche if m == nil { 776*46c4c49dSIbrahim Kanouche t.Errorf("Couldn't match %q", name) 777*46c4c49dSIbrahim Kanouche continue 778*46c4c49dSIbrahim Kanouche } 779*46c4c49dSIbrahim Kanouche 780*46c4c49dSIbrahim Kanouche if !classifier.WithinConfidenceThreshold(m.Confidence) { 781*46c4c49dSIbrahim Kanouche t.Errorf("ConfidenceMatch(%q) => %v, want %v", name, m.Confidence, 0.99) 782*46c4c49dSIbrahim Kanouche } 783*46c4c49dSIbrahim Kanouche want := strings.TrimSuffix(name, ".header") 784*46c4c49dSIbrahim Kanouche if want != m.Name { 785*46c4c49dSIbrahim Kanouche t.Errorf("LicenseMatch(%q) => %v, want %v", name, m.Name, want) 786*46c4c49dSIbrahim Kanouche } 787*46c4c49dSIbrahim Kanouche } 788*46c4c49dSIbrahim Kanouche} 789*46c4c49dSIbrahim Kanouche 790*46c4c49dSIbrahim Kanouchefunc BenchmarkClassifier(b *testing.B) { 791*46c4c49dSIbrahim Kanouche contents := apache20[:len(apache20)/2] + "hello" + apache20[len(apache20)/2:] 792*46c4c49dSIbrahim Kanouche 793*46c4c49dSIbrahim Kanouche b.ResetTimer() 794*46c4c49dSIbrahim Kanouche for i := 0; i < b.N; i++ { 795*46c4c49dSIbrahim Kanouche classifier, err := New(DefaultConfidenceThreshold) 796*46c4c49dSIbrahim Kanouche if err != nil { 797*46c4c49dSIbrahim Kanouche b.Errorf("Cannot create classifier: %v", err) 798*46c4c49dSIbrahim Kanouche continue 799*46c4c49dSIbrahim Kanouche } 800*46c4c49dSIbrahim Kanouche classifier.NearestMatch(contents) 801*46c4c49dSIbrahim Kanouche } 802*46c4c49dSIbrahim Kanouche} 803*46c4c49dSIbrahim Kanouche 804*46c4c49dSIbrahim Kanouchefunc TestNew(t *testing.T) { 805*46c4c49dSIbrahim Kanouche tests := []struct { 806*46c4c49dSIbrahim Kanouche desc string 807*46c4c49dSIbrahim Kanouche options []OptionFunc 808*46c4c49dSIbrahim Kanouche wantArchive func() []byte 809*46c4c49dSIbrahim Kanouche wantErr bool 810*46c4c49dSIbrahim Kanouche }{ 811*46c4c49dSIbrahim Kanouche { 812*46c4c49dSIbrahim Kanouche desc: "no options, use default", 813*46c4c49dSIbrahim Kanouche options: []OptionFunc{}, 814*46c4c49dSIbrahim Kanouche wantArchive: nil, 815*46c4c49dSIbrahim Kanouche }, 816*46c4c49dSIbrahim Kanouche { 817*46c4c49dSIbrahim Kanouche desc: "specify ForbiddenLicenseArchive", 818*46c4c49dSIbrahim Kanouche options: []OptionFunc{Archive(ForbiddenLicenseArchive)}, 819*46c4c49dSIbrahim Kanouche wantArchive: func() []byte { 820*46c4c49dSIbrahim Kanouche b, _ := ReadLicenseFile(ForbiddenLicenseArchive) 821*46c4c49dSIbrahim Kanouche return b 822*46c4c49dSIbrahim Kanouche }, 823*46c4c49dSIbrahim Kanouche }, 824*46c4c49dSIbrahim Kanouche { 825*46c4c49dSIbrahim Kanouche desc: "file doesn't exist results in error", 826*46c4c49dSIbrahim Kanouche options: []OptionFunc{Archive("doesnotexist")}, 827*46c4c49dSIbrahim Kanouche wantArchive: func() []byte { return nil }, 828*46c4c49dSIbrahim Kanouche wantErr: true, 829*46c4c49dSIbrahim Kanouche }, 830*46c4c49dSIbrahim Kanouche { 831*46c4c49dSIbrahim Kanouche desc: "raw bytes archive", 832*46c4c49dSIbrahim Kanouche options: []OptionFunc{ArchiveBytes([]byte("not a gzipped file"))}, 833*46c4c49dSIbrahim Kanouche wantArchive: func() []byte { return []byte("not a gzipped file") }, 834*46c4c49dSIbrahim Kanouche wantErr: true, 835*46c4c49dSIbrahim Kanouche }, 836*46c4c49dSIbrahim Kanouche { 837*46c4c49dSIbrahim Kanouche desc: "function archive", 838*46c4c49dSIbrahim Kanouche options: []OptionFunc{ArchiveFunc(func() ([]byte, error) { 839*46c4c49dSIbrahim Kanouche return []byte("not a gzipped file"), nil 840*46c4c49dSIbrahim Kanouche })}, 841*46c4c49dSIbrahim Kanouche wantArchive: func() []byte { return []byte("not a gzipped file") }, 842*46c4c49dSIbrahim Kanouche wantErr: true, 843*46c4c49dSIbrahim Kanouche }, 844*46c4c49dSIbrahim Kanouche } 845*46c4c49dSIbrahim Kanouche for _, tt := range tests { 846*46c4c49dSIbrahim Kanouche t.Run(tt.desc, func(t *testing.T) { 847*46c4c49dSIbrahim Kanouche c, err := New(0.5, tt.options...) 848*46c4c49dSIbrahim Kanouche if tt.wantErr != (err != nil) { 849*46c4c49dSIbrahim Kanouche t.Fatalf("unexpected error: %v", err) 850*46c4c49dSIbrahim Kanouche } 851*46c4c49dSIbrahim Kanouche if err == nil { 852*46c4c49dSIbrahim Kanouche if tt.wantArchive == nil { 853*46c4c49dSIbrahim Kanouche if c.archive != nil { 854*46c4c49dSIbrahim Kanouche t.Errorf("wanted default archive, but got specified archive") 855*46c4c49dSIbrahim Kanouche } 856*46c4c49dSIbrahim Kanouche } else { 857*46c4c49dSIbrahim Kanouche got, _ := c.archive() 858*46c4c49dSIbrahim Kanouche want := tt.wantArchive() 859*46c4c49dSIbrahim Kanouche if !bytes.Equal(got, want) { 860*46c4c49dSIbrahim Kanouche t.Errorf("archives did not match; got %d bytes, wanted %d", len(got), len(want)) 861*46c4c49dSIbrahim Kanouche } 862*46c4c49dSIbrahim Kanouche } 863*46c4c49dSIbrahim Kanouche } 864*46c4c49dSIbrahim Kanouche }) 865*46c4c49dSIbrahim Kanouche } 866*46c4c49dSIbrahim Kanouche 867*46c4c49dSIbrahim Kanouche} 868