1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package licenseclassifier 16 17import ( 18 "bytes" 19 "log" 20 "os" 21 "path/filepath" 22 "strings" 23 "testing" 24 25 "github.com/google/licenseclassifier/stringclassifier" 26) 27 28var ( 29 agpl30, agpl30Header, apache20, bsd3, gpl20, ccbync20 string 30 classifier *License 31) 32 33func TestMain(m *testing.M) { 34 a30, err := ReadLicenseFile("AGPL-3.0.txt") 35 if err != nil { 36 log.Fatalf("error reading contents of AGPL-3.0.txt: %v", err) 37 } 38 a30h, err := ReadLicenseFile("AGPL-3.0.header.txt") 39 if err != nil { 40 log.Fatalf("error reading contents of AGPL-3.0.header.txt: %v", err) 41 } 42 a20, err := ReadLicenseFile("Apache-2.0.txt") 43 if err != nil { 44 log.Fatalf("error reading contents of Apache-2.0.txt: %v", err) 45 } 46 b3, err := ReadLicenseFile("BSD-3-Clause.txt") 47 if err != nil { 48 log.Fatalf("error reading contents of BSD-3-Clause.txt: %v", err) 49 } 50 g2, err := ReadLicenseFile("GPL-2.0.txt") 51 if err != nil { 52 log.Fatalf("error reading contents of GPL-2.0.txt: %v", err) 53 } 54 cc20, err := ReadLicenseFile("CC-BY-NC-2.0.txt") 55 if err != nil { 56 log.Fatalf("error reading contents of CC-BY-NC-2.0.txt: %v", err) 57 } 58 59 agpl30 = TrimExtraneousTrailingText(string(a30)) 60 agpl30Header = TrimExtraneousTrailingText(string(a30h)) 61 apache20 = TrimExtraneousTrailingText(string(a20)) 62 bsd3 = TrimExtraneousTrailingText(string(b3)) 63 gpl20 = TrimExtraneousTrailingText(string(g2)) 64 ccbync20 = TrimExtraneousTrailingText(string(cc20)) 65 66 classifier, err = New(DefaultConfidenceThreshold) 67 if err != nil { 68 log.Fatalf("cannot create license classifier: %v", err) 69 } 70 os.Exit(m.Run()) 71} 72 73func TestClassifier_NearestMatch(t *testing.T) { 74 tests := []struct { 75 description string 76 filename string 77 extraText string 78 wantLicense string 79 wantConfidence float64 80 }{ 81 { 82 description: "AGPL 3.0 license", 83 filename: "AGPL-3.0.txt", 84 wantLicense: "AGPL-3.0", 85 wantConfidence: 1.0, 86 }, 87 { 88 description: "Apache 2.0 license", 89 filename: "Apache-2.0.txt", 90 wantLicense: "Apache-2.0", 91 wantConfidence: 1.0, 92 }, 93 { 94 description: "GPL 2.0 license", 95 filename: "GPL-2.0.txt", 96 wantLicense: "GPL-2.0", 97 wantConfidence: 1.0, 98 }, 99 { 100 description: "BSD 3 Clause license with extra text", 101 filename: "BSD-3-Clause.txt", 102 extraText: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n", 103 wantLicense: "BSD-3-Clause", 104 wantConfidence: 0.94, 105 }, 106 } 107 108 classifier.Threshold = DefaultConfidenceThreshold 109 for _, tt := range tests { 110 content, err := ReadLicenseFile(tt.filename) 111 if err != nil { 112 t.Errorf("error reading contents of %q license: %v", tt.wantLicense, err) 113 continue 114 } 115 116 m := classifier.NearestMatch(tt.extraText + TrimExtraneousTrailingText(string(content))) 117 if got, want := m.Name, tt.wantLicense; got != want { 118 t.Errorf("NearestMatch(%q) = %q, want %q", tt.description, got, want) 119 } 120 if got, want := m.Confidence, tt.wantConfidence; got < want { 121 t.Errorf("NearestMatch(%q) = %v, want %v", tt.description, got, want) 122 } 123 } 124} 125 126func TestClassifier_MultipleMatch(t *testing.T) { 127 tests := []struct { 128 description string 129 text string 130 want stringclassifier.Matches 131 }{ 132 { 133 description: "Two licenses", 134 text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + bsd3, 135 want: stringclassifier.Matches{ 136 { 137 Name: "Apache-2.0", 138 Confidence: 1.0, 139 }, 140 { 141 Name: "BSD-3-Clause", 142 Confidence: 1.0, 143 }, 144 }, 145 }, 146 { 147 description: "Two licenses: partial match", 148 text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + 149 string(apache20[:len(apache20)/2-1]) + string(apache20[len(apache20)/2+7:]) + strings.Repeat("-", 80) + "\n" + 150 string(bsd3[:len(bsd3)/2]) + "intervening stuff" + string(bsd3[len(bsd3)/2:]), 151 want: stringclassifier.Matches{ 152 { 153 Name: "Apache-2.0", 154 Confidence: 0.99, 155 }, 156 { 157 Name: "BSD-3-Clause", 158 Confidence: 0.98, 159 }, 160 }, 161 }, 162 { 163 description: "Two licenses: one forbidden the other okay", 164 text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + ccbync20, 165 want: stringclassifier.Matches{ 166 { 167 Name: "Apache-2.0", 168 Confidence: 0.99, 169 }, 170 { 171 Name: "CC-BY-NC-2.0", 172 Confidence: 1.0, 173 }, 174 }, 175 }, 176 { 177 description: "Two licenses without any space between them.", 178 text: apache20 + "." + bsd3, 179 want: stringclassifier.Matches{ 180 { 181 Name: "Apache-2.0", 182 Confidence: 1.0, 183 }, 184 { 185 Name: "BSD-3-Clause", 186 Confidence: 1.0, 187 }, 188 }, 189 }, 190 } 191 192 classifier.Threshold = 0.95 193 defer func() { 194 classifier.Threshold = DefaultConfidenceThreshold 195 }() 196 for _, tt := range tests { 197 m := classifier.MultipleMatch(tt.text, false) 198 if len(m) != len(tt.want) { 199 t.Fatalf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want)) 200 continue 201 } 202 203 for i := 0; i < len(m); i++ { 204 w := tt.want[i] 205 if got, want := m[i].Name, w.Name; got != want { 206 t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want) 207 } 208 if got, want := m[i].Confidence, w.Confidence; got < want { 209 t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want) 210 } 211 } 212 } 213} 214 215func TestClassifier_MultipleMatch_Headers(t *testing.T) { 216 tests := []struct { 217 description string 218 text string 219 want stringclassifier.Matches 220 }{ 221 { 222 description: "AGPL-3.0 header", 223 text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + agpl30Header, 224 want: stringclassifier.Matches{ 225 { 226 Name: "AGPL-3.0", 227 Confidence: 1.0, 228 Offset: 0, 229 }, 230 }, 231 }, 232 { 233 description: "Modified LGPL-2.1 header", 234 text: `Common Widget code. 235 236Copyright (C) 2013-2015 Yoyodyne, Inc. 237 238This library is free software; you can redistribute it and/or 239modify it under the terms of the GNU Lesser General Public 240License as published by the Free Software Foundation; either 241version 2.1 of the License, or (at your option) any later version (but not!). 242 243This library is distributed in the hope that it will be useful, 244but WITHOUT ANY WARRANTY; without even the implied warranty of 245MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 246Lesser General Public License for more details. 247 248You should have received a copy of the GNU Lesser General Public 249License along with this library; if not, write to the Free Software 250Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 251`, 252 want: stringclassifier.Matches{ 253 { 254 Name: "LGPL-2.1", 255 Confidence: 0.97, 256 Offset: 197, 257 }, 258 }, 259 }, 260 } 261 262 classifier.Threshold = 0.90 263 defer func() { 264 classifier.Threshold = DefaultConfidenceThreshold 265 }() 266 for _, tt := range tests { 267 m := classifier.MultipleMatch(tt.text, true) 268 if len(m) != len(tt.want) { 269 t.Errorf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want)) 270 continue 271 } 272 273 for i := 0; i < len(m); i++ { 274 w := tt.want[i] 275 if got, want := m[i].Name, w.Name; got != want { 276 t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want) 277 } 278 if got, want := m[i].Confidence, w.Confidence; got < want { 279 t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want) 280 } 281 } 282 } 283} 284 285func TestClassifier_CopyrightHolder(t *testing.T) { 286 tests := []struct { 287 copyright string 288 want string 289 }{ 290 { 291 copyright: "Copyright 2008 Yoyodyne Inc. All Rights Reserved.", 292 want: "Yoyodyne Inc.", 293 }, 294 { 295 copyright: "Copyright 2010-2016 Yoyodyne, Inc.", 296 want: "Yoyodyne, Inc.", 297 }, 298 { 299 copyright: "Copyright 2010, 2011, 2012 Yoyodyne, Inc., All rights reserved.", 300 want: "Yoyodyne, Inc.", 301 }, 302 { 303 copyright: "Copyright (c) 2015 Yoyodyne, Inc. All rights reserved.", 304 want: "Yoyodyne, Inc.", 305 }, 306 { 307 copyright: "Copyright © 1998 by Yoyodyne, Inc., San Narciso, CA, US.", 308 want: "Yoyodyne, Inc., San Narciso, CA, US", 309 }, 310 { 311 copyright: "Copyright (c) 2015 The Algonquin Round Table. All rights reserved.", 312 want: "The Algonquin Round Table", 313 }, 314 { 315 copyright: "Copyright 2016, The Android Open Source Project", 316 want: "The Android Open Source Project", 317 }, 318 { 319 copyright: `--------------------------------------------------------- 320foo.c: 321Copyright 2016, The Android Open Source Project 322`, 323 want: "The Android Open Source Project", 324 }, 325 } 326 327 for _, tt := range tests { 328 got := CopyrightHolder(tt.copyright) 329 if got != tt.want { 330 t.Errorf("CopyrightHolder(%q) = %q, want %q", tt.copyright, got, tt.want) 331 } 332 } 333} 334 335func TestClassifier_WithinConfidenceThreshold(t *testing.T) { 336 tests := []struct { 337 description string 338 text string 339 confDef bool 340 conf99 bool 341 conf93 bool 342 conf5 bool 343 }{ 344 { 345 description: "Apache 2.0", 346 text: apache20, 347 confDef: true, 348 conf99: true, 349 conf93: true, 350 conf5: true, 351 }, 352 { 353 description: "GPL 2.0", 354 text: gpl20, 355 confDef: true, 356 conf99: true, 357 conf93: true, 358 conf5: true, 359 }, 360 { 361 description: "BSD 3 Clause license with extra text", 362 text: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n" + bsd3, 363 confDef: true, 364 conf99: true, 365 conf93: true, 366 conf5: true, 367 }, 368 { 369 description: "Very low confidence", 370 text: strings.Repeat("Random text is random, but not a license\n", 40), 371 confDef: false, 372 conf99: false, 373 conf93: false, 374 conf5: true, 375 }, 376 } 377 378 defer func() { 379 classifier.Threshold = DefaultConfidenceThreshold 380 }() 381 for _, tt := range tests { 382 t.Run(tt.description, func(t *testing.T) { 383 classifier.Threshold = DefaultConfidenceThreshold 384 m := classifier.NearestMatch(tt.text) 385 if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.confDef { 386 t.Errorf("WithinConfidenceThreshold() at %v returned wrong result; got %v, want %v", classifier.Threshold, got, tt.confDef) 387 } 388 389 classifier.Threshold = 0.99 390 m = classifier.NearestMatch(tt.text) 391 if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf99 { 392 t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf99) 393 } 394 395 classifier.Threshold = 0.93 396 m = classifier.NearestMatch(tt.text) 397 if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf93 { 398 t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf93) 399 } 400 401 classifier.Threshold = 0.05 402 m = classifier.NearestMatch(tt.text) 403 if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf5 { 404 t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf5) 405 } 406 }) 407 } 408} 409 410func TestRemoveIgnorableText(t *testing.T) { 411 const want = `Lorem ipsum dolor sit amet, pellentesque wisi tortor duis, amet adipiscing bibendum elit aliquam 412leo. Mattis commodo sed accumsan at in. 413` 414 415 tests := []struct { 416 original string 417 want string 418 }{ 419 {"MIT License\n", "\n"}, 420 {"The MIT License\n", "\n"}, 421 {"The MIT License (MIT)\n", "\n"}, 422 {"BSD License\n", "\n"}, 423 {"New BSD License\n", "\n"}, 424 {"COPYRIGHT AND PERMISSION NOTICE\n", "\n"}, 425 {"Copyright (c) 2016, Yoyodyne, Inc.\n", "\n"}, 426 {"All rights reserved.\n", "\n"}, 427 {"Some rights reserved.\n", "\n"}, 428 {"@license\n", "\n"}, 429 430 // Now with wanted texts. 431 { 432 original: `The MIT License 433 434Copyright (c) 2016, Yoyodyne, Inc. 435All rights reserved. 436` + want, 437 want: strings.ToLower(want), 438 }, 439 } 440 441 for _, tt := range tests { 442 if got := removeIgnorableTexts(strings.ToLower(tt.original)); got != tt.want { 443 t.Errorf("Mismatch(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want) 444 } 445 } 446} 447 448func TestRemoveShebangLine(t *testing.T) { 449 tests := []struct { 450 original string 451 want string 452 }{ 453 { 454 original: "", 455 want: "", 456 }, 457 { 458 original: "#!/usr/bin/env python -C", 459 want: "#!/usr/bin/env python -C", 460 }, 461 { 462 original: `#!/usr/bin/env python -C 463# First line of license text. 464# Second line of license text. 465`, 466 want: `# First line of license text. 467# Second line of license text. 468`, 469 }, 470 { 471 original: `# First line of license text. 472# Second line of license text. 473`, 474 want: `# First line of license text. 475# Second line of license text. 476`, 477 }, 478 } 479 480 for _, tt := range tests { 481 got := removeShebangLine(tt.original) 482 if got != tt.want { 483 t.Errorf("RemoveShebangLine(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want) 484 } 485 } 486} 487 488func TestRemoveNonWords(t *testing.T) { 489 tests := []struct { 490 original string 491 want string 492 }{ 493 { 494 original: `# # Hello 495## World 496`, 497 want: ` Hello World `, 498 }, 499 { 500 original: ` * This text has a bulleted list: 501 * * item 1 502 * * item 2`, 503 want: ` This text has a bulleted list item 1 item 2`, 504 }, 505 { 506 original: ` 507 508 * This text has a bulleted list: 509 * * item 1 510 * * item 2`, 511 want: ` This text has a bulleted list item 1 item 2`, 512 }, 513 { 514 original: `// This text has a bulleted list: 515// 1. item 1 516// 2. item 2`, 517 want: ` This text has a bulleted list 1 item 1 2 item 2`, 518 }, 519 { 520 original: `// «Copyright (c) 1998 Yoyodyne, Inc.» 521// This text has a bulleted list: 522// 1. item 1 523// 2. item 2 524`, 525 want: ` «Copyright c 1998 Yoyodyne Inc » This text has a bulleted list 1 item 1 2 item 2 `, 526 }, 527 { 528 original: `* 529 * This is the first line we want. 530 * This is the second line we want. 531 * This is the third line we want. 532 * This is the last line we want. 533`, 534 want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `, 535 }, 536 { 537 original: `===---------------------------------------------=== 538*** 539* This is the first line we want. 540* This is the second line we want. 541* This is the third line we want. 542* This is the last line we want. 543*** 544===---------------------------------------------=== 545`, 546 want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `, 547 }, 548 { 549 original: strings.Repeat("-", 80), 550 want: " ", 551 }, 552 { 553 original: strings.Repeat("=", 80), 554 want: " ", 555 }, 556 { 557 original: "/*\n", 558 want: " ", 559 }, 560 { 561 original: "/*\n * precursor text\n */\n", 562 want: " precursor text ", 563 }, 564 // Test for b/63540492. 565 { 566 original: " */\n", 567 want: " ", 568 }, 569 { 570 original: "", 571 want: "", 572 }, 573 } 574 575 for _, tt := range tests { 576 if got := stringclassifier.FlattenWhitespace(RemoveNonWords(tt.original)); got != tt.want { 577 t.Errorf("Mismatch(%q) => %v, want %v", tt.original, got, tt.want) 578 } 579 } 580} 581 582func TestNormalizePunctuation(t *testing.T) { 583 tests := []struct { 584 original string 585 want string 586 }{ 587 // Hyphens and dashes. 588 {"—", "-"}, 589 {"-", "-"}, 590 {"‒", "-"}, 591 {"–", "-"}, 592 {"—", "-"}, 593 594 // Quotes. 595 {"'", "'"}, 596 {`"`, "'"}, 597 {"‘", "'"}, 598 {"’", "'"}, 599 {"“", "'"}, 600 {"”", "'"}, 601 {" ” ", " ' "}, 602 603 // Backtick. 604 {"`", "'"}, 605 606 // Copyright mark. 607 {"©", "(c)"}, 608 609 // Hyphen-separated words. 610 {"general- purpose, non- compliant", "general-purpose, non-compliant"}, 611 612 // Section. 613 {"§", "(s)"}, 614 {"¤", "(s)"}, 615 } 616 617 for _, tt := range tests { 618 if got := NormalizePunctuation(tt.original); got != tt.want { 619 t.Errorf("Mismatch => %v, want %v", got, tt.want) 620 } 621 } 622} 623 624func TestNormalizeEquivalentWords(t *testing.T) { 625 tests := []struct { 626 original string 627 want string 628 }{ 629 {"acknowledgment", "Acknowledgement"}, 630 {"ANalogue", "Analog"}, 631 {"AnAlyse", "Analyze"}, 632 {"ArtefacT", "Artifact"}, 633 {"authorisation", "Authorization"}, 634 {"AuthoriSed", "Authorized"}, 635 {"CalIbre", "Caliber"}, 636 {"CanCelled", "Canceled"}, 637 {"CapitaliSations", "Capitalizations"}, 638 {"CatalogUe", "Catalog"}, 639 {"CategoriSe", "Categorize"}, 640 {"CentRE", "Center"}, 641 {"EmphasiSed", "Emphasized"}, 642 {"FavoUr", "Favor"}, 643 {"FavoUrite", "Favorite"}, 644 {"FulfiL", "Fulfill"}, 645 {"FulfiLment", "Fulfillment"}, 646 {"InitialiSe", "Initialize"}, 647 {"JudGMent", "Judgement"}, 648 {"LabelLing", "Labeling"}, 649 {"LaboUr", "Labor"}, 650 {"LicenCe", "License"}, 651 {"MaximiSe", "Maximize"}, 652 {"ModelLed", "Modeled"}, 653 {"ModeLling", "Modeling"}, 654 {"OffenCe", "Offense"}, 655 {"OptimiSe", "Optimize"}, 656 {"OrganiSation", "Organization"}, 657 {"OrganiSe", "Organize"}, 658 {"PractiSe", "Practice"}, 659 {"ProgramME", "Program"}, 660 {"RealiSe", "Realize"}, 661 {"RecogniSe", "Recognize"}, 662 {"SignalLing", "Signaling"}, 663 {"sub-license", "Sublicense"}, 664 {"sub license", "Sublicense"}, 665 {"UtiliSation", "Utilization"}, 666 {"WhilST", "While"}, 667 {"WilfuL", "Wilfull"}, 668 {"Non-coMMercial", "Noncommercial"}, 669 {"Per Cent", "Percent"}, 670 } 671 672 for _, tt := range tests { 673 if got := NormalizeEquivalentWords(tt.original); got != tt.want { 674 t.Errorf("Mismatch => %v, want %v", got, tt.want) 675 } 676 } 677} 678 679func TestTrimExtraneousTrailingText(t *testing.T) { 680 tests := []struct { 681 original string 682 want string 683 }{ 684 { 685 original: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL 686 ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE 687 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 688 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 689 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 690 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 691 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 692 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 693 SUCH DAMAGES. 694 695 END OF TERMS AND CONDITIONS 696 697 How to Apply These Terms to Your New Programs 698 699 If you develop a new program, and you want it to be of the greatest 700 possible use to the public, the best way to achieve this is to make it free 701 software which everyone can redistribute and change under these terms. 702`, 703 want: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL 704 ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE 705 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 706 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 707 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 708 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 709 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 710 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 711 SUCH DAMAGES. 712 713 END OF TERMS AND CONDITIONS`, 714 }, 715 } 716 717 for _, tt := range tests { 718 if got := TrimExtraneousTrailingText(tt.original); got != tt.want { 719 t.Errorf("Mismatch => %q, want %q", got, tt.want) 720 } 721 } 722} 723 724func TestCommonLicenseWords(t *testing.T) { 725 files, err := ReadLicenseDir() 726 if err != nil { 727 t.Fatalf("error: cannot read licenses directory: %v", err) 728 } 729 if files == nil { 730 t.Fatal("error: cannot get licenses from license directory") 731 } 732 733 for _, file := range files { 734 if filepath.Ext(file.Name()) != ".txt" { 735 continue 736 } 737 text, err := ReadLicenseFile(file.Name()) 738 if err != nil { 739 t.Fatalf("error reading contents of %q: %v", file.Name(), err) 740 } 741 742 if got := classifier.hasCommonLicenseWords(string(text)); !got { 743 t.Errorf("Mismatch(%q) => false, want true", file.Name()) 744 } 745 } 746 747 text := strings.Repeat("Þetta er ekki leyfi.\n", 80) 748 if got := classifier.hasCommonLicenseWords(text); got { 749 t.Error("Mismatch => true, want false") 750 } 751} 752 753func TestLicenseMatchQuality(t *testing.T) { 754 files, err := ReadLicenseDir() 755 if err != nil { 756 t.Fatalf("error: cannot read licenses directory: %v", err) 757 } 758 759 classifier.Threshold = 1.0 760 defer func() { 761 classifier.Threshold = DefaultConfidenceThreshold 762 }() 763 for _, file := range files { 764 if filepath.Ext(file.Name()) != ".txt" { 765 continue 766 } 767 name := strings.TrimSuffix(file.Name(), ".txt") 768 769 contents, err := ReadLicenseFile(file.Name()) 770 if err != nil { 771 t.Fatalf("error reading contents of %q: %v", file.Name(), err) 772 } 773 774 m := classifier.NearestMatch(TrimExtraneousTrailingText(string(contents))) 775 if m == nil { 776 t.Errorf("Couldn't match %q", name) 777 continue 778 } 779 780 if !classifier.WithinConfidenceThreshold(m.Confidence) { 781 t.Errorf("ConfidenceMatch(%q) => %v, want %v", name, m.Confidence, 0.99) 782 } 783 want := strings.TrimSuffix(name, ".header") 784 if want != m.Name { 785 t.Errorf("LicenseMatch(%q) => %v, want %v", name, m.Name, want) 786 } 787 } 788} 789 790func BenchmarkClassifier(b *testing.B) { 791 contents := apache20[:len(apache20)/2] + "hello" + apache20[len(apache20)/2:] 792 793 b.ResetTimer() 794 for i := 0; i < b.N; i++ { 795 classifier, err := New(DefaultConfidenceThreshold) 796 if err != nil { 797 b.Errorf("Cannot create classifier: %v", err) 798 continue 799 } 800 classifier.NearestMatch(contents) 801 } 802} 803 804func TestNew(t *testing.T) { 805 tests := []struct { 806 desc string 807 options []OptionFunc 808 wantArchive func() []byte 809 wantErr bool 810 }{ 811 { 812 desc: "no options, use default", 813 options: []OptionFunc{}, 814 wantArchive: nil, 815 }, 816 { 817 desc: "specify ForbiddenLicenseArchive", 818 options: []OptionFunc{Archive(ForbiddenLicenseArchive)}, 819 wantArchive: func() []byte { 820 b, _ := ReadLicenseFile(ForbiddenLicenseArchive) 821 return b 822 }, 823 }, 824 { 825 desc: "file doesn't exist results in error", 826 options: []OptionFunc{Archive("doesnotexist")}, 827 wantArchive: func() []byte { return nil }, 828 wantErr: true, 829 }, 830 { 831 desc: "raw bytes archive", 832 options: []OptionFunc{ArchiveBytes([]byte("not a gzipped file"))}, 833 wantArchive: func() []byte { return []byte("not a gzipped file") }, 834 wantErr: true, 835 }, 836 { 837 desc: "function archive", 838 options: []OptionFunc{ArchiveFunc(func() ([]byte, error) { 839 return []byte("not a gzipped file"), nil 840 })}, 841 wantArchive: func() []byte { return []byte("not a gzipped file") }, 842 wantErr: true, 843 }, 844 } 845 for _, tt := range tests { 846 t.Run(tt.desc, func(t *testing.T) { 847 c, err := New(0.5, tt.options...) 848 if tt.wantErr != (err != nil) { 849 t.Fatalf("unexpected error: %v", err) 850 } 851 if err == nil { 852 if tt.wantArchive == nil { 853 if c.archive != nil { 854 t.Errorf("wanted default archive, but got specified archive") 855 } 856 } else { 857 got, _ := c.archive() 858 want := tt.wantArchive() 859 if !bytes.Equal(got, want) { 860 t.Errorf("archives did not match; got %d bytes, wanted %d", len(got), len(want)) 861 } 862 } 863 } 864 }) 865 } 866 867} 868