1*ba677afaSXin Li// Package idsearcher is used to search for short-form IDs in files 2*ba677afaSXin Li// within a directory, and to build an SPDX Document containing those 3*ba677afaSXin Li// license findings. 4*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5*ba677afaSXin Lipackage idsearcher 6*ba677afaSXin Li 7*ba677afaSXin Liimport ( 8*ba677afaSXin Li "bufio" 9*ba677afaSXin Li "fmt" 10*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/v2_3" 11*ba677afaSXin Li "os" 12*ba677afaSXin Li "path/filepath" 13*ba677afaSXin Li "regexp" 14*ba677afaSXin Li "sort" 15*ba677afaSXin Li "strings" 16*ba677afaSXin Li 17*ba677afaSXin Li "github.com/spdx/tools-golang/builder" 18*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/v2_1" 19*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/v2_2" 20*ba677afaSXin Li "github.com/spdx/tools-golang/utils" 21*ba677afaSXin Li) 22*ba677afaSXin Li 23*ba677afaSXin Li// ===== 2.1 Searcher functions ===== 24*ba677afaSXin Li 25*ba677afaSXin Li// Config2_1 is a collection of configuration settings for docbuilder 26*ba677afaSXin Li// (for version 2.1 SPDX Documents). A few mandatory fields are set here 27*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_1. 28*ba677afaSXin Litype Config2_1 struct { 29*ba677afaSXin Li // NamespacePrefix should be a URI representing a prefix for the 30*ba677afaSXin Li // namespace with which the SPDX Document will be associated. 31*ba677afaSXin Li // It will be used in the DocumentNamespace field in the CreationInfo 32*ba677afaSXin Li // section, followed by the per-Document package name and a random UUID. 33*ba677afaSXin Li NamespacePrefix string 34*ba677afaSXin Li 35*ba677afaSXin Li // BuilderPathsIgnored lists certain paths to be omitted from the built 36*ba677afaSXin Li // document. Each string should be a path, relative to the package's 37*ba677afaSXin Li // dirRoot, to a specific file or (for all files in a directory) ending 38*ba677afaSXin Li // in a slash. Prefix the string with "**" to omit all instances of that 39*ba677afaSXin Li // file / directory, regardless of where it is in the file tree. 40*ba677afaSXin Li BuilderPathsIgnored []string 41*ba677afaSXin Li 42*ba677afaSXin Li // SearcherPathsIgnored lists certain paths that should not be searched 43*ba677afaSXin Li // by idsearcher, even if those paths have Files present. It uses the 44*ba677afaSXin Li // same format as BuilderPathsIgnored. 45*ba677afaSXin Li SearcherPathsIgnored []string 46*ba677afaSXin Li} 47*ba677afaSXin Li 48*ba677afaSXin Li// BuildIDsDocument2_1 creates an SPDX Document (version 2.1) and searches for 49*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It 50*ba677afaSXin Li// returns that document or error if any is encountered. Arguments: 51*ba677afaSXin Li// - packageName: name of package / directory 52*ba677afaSXin Li// - dirRoot: path to directory to be analyzed 53*ba677afaSXin Li// - namespacePrefix: URI representing a prefix for the 54*ba677afaSXin Li// namespace with which the SPDX Document will be associated 55*ba677afaSXin Lifunc BuildIDsDocument2_1(packageName string, dirRoot string, idconfig *Config2_1) (*v2_1.Document, error) { 56*ba677afaSXin Li // first, build the Document using builder 57*ba677afaSXin Li bconfig := &builder.Config2_1{ 58*ba677afaSXin Li NamespacePrefix: idconfig.NamespacePrefix, 59*ba677afaSXin Li CreatorType: "Tool", 60*ba677afaSXin Li Creator: "github.com/spdx/tools-golang/idsearcher", 61*ba677afaSXin Li PathsIgnored: idconfig.BuilderPathsIgnored, 62*ba677afaSXin Li } 63*ba677afaSXin Li doc, err := builder.Build2_1(packageName, dirRoot, bconfig) 64*ba677afaSXin Li if err != nil { 65*ba677afaSXin Li return nil, err 66*ba677afaSXin Li } 67*ba677afaSXin Li if doc == nil { 68*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Document") 69*ba677afaSXin Li } 70*ba677afaSXin Li if doc.Packages == nil { 71*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Packages map") 72*ba677afaSXin Li } 73*ba677afaSXin Li if len(doc.Packages) != 1 { 74*ba677afaSXin Li return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages)) 75*ba677afaSXin Li } 76*ba677afaSXin Li 77*ba677afaSXin Li // now, walk through each file and find its licenses (if any) 78*ba677afaSXin Li pkg := doc.Packages[0] 79*ba677afaSXin Li if pkg == nil { 80*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Package") 81*ba677afaSXin Li } 82*ba677afaSXin Li if pkg.Files == nil { 83*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Files in Package") 84*ba677afaSXin Li } 85*ba677afaSXin Li licsForPackage := map[string]int{} 86*ba677afaSXin Li for _, f := range pkg.Files { 87*ba677afaSXin Li // start by initializing / clearing values 88*ba677afaSXin Li f.LicenseInfoInFiles = []string{"NOASSERTION"} 89*ba677afaSXin Li f.LicenseConcluded = "NOASSERTION" 90*ba677afaSXin Li 91*ba677afaSXin Li // check whether the searcher should ignore this file 92*ba677afaSXin Li if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) { 93*ba677afaSXin Li continue 94*ba677afaSXin Li } 95*ba677afaSXin Li 96*ba677afaSXin Li fPath := filepath.Join(dirRoot, f.FileName) 97*ba677afaSXin Li // FIXME this is not preferable -- ignoring error 98*ba677afaSXin Li ids, _ := searchFileIDs(fPath) 99*ba677afaSXin Li // FIXME for now, proceed onwards with whatever IDs we obtained. 100*ba677afaSXin Li // FIXME instead of ignoring the error, should probably either log it, 101*ba677afaSXin Li // FIXME and/or enable the caller to configure what should happen. 102*ba677afaSXin Li 103*ba677afaSXin Li // separate out for this file's licenses 104*ba677afaSXin Li licsForFile := map[string]int{} 105*ba677afaSXin Li licsParens := []string{} 106*ba677afaSXin Li for _, lid := range ids { 107*ba677afaSXin Li // get individual elements and add for file and package 108*ba677afaSXin Li licElements := getIndividualLicenses(lid) 109*ba677afaSXin Li for _, elt := range licElements { 110*ba677afaSXin Li licsForFile[elt] = 1 111*ba677afaSXin Li licsForPackage[elt] = 1 112*ba677afaSXin Li } 113*ba677afaSXin Li // parenthesize if needed and add to slice for joining 114*ba677afaSXin Li licsParens = append(licsParens, makeElement(lid)) 115*ba677afaSXin Li } 116*ba677afaSXin Li 117*ba677afaSXin Li // OK -- now we can fill in the file's details, or NOASSERTION if none 118*ba677afaSXin Li if len(licsForFile) > 0 { 119*ba677afaSXin Li f.LicenseInfoInFiles = []string{} 120*ba677afaSXin Li for lic := range licsForFile { 121*ba677afaSXin Li f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic) 122*ba677afaSXin Li } 123*ba677afaSXin Li sort.Strings(f.LicenseInfoInFiles) 124*ba677afaSXin Li // avoid adding parens and joining for single-ID items 125*ba677afaSXin Li if len(licsParens) == 1 { 126*ba677afaSXin Li f.LicenseConcluded = ids[0] 127*ba677afaSXin Li } else { 128*ba677afaSXin Li f.LicenseConcluded = strings.Join(licsParens, " AND ") 129*ba677afaSXin Li } 130*ba677afaSXin Li } 131*ba677afaSXin Li } 132*ba677afaSXin Li 133*ba677afaSXin Li // and finally, we can fill in the package's details 134*ba677afaSXin Li if len(licsForPackage) == 0 { 135*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"} 136*ba677afaSXin Li } else { 137*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{} 138*ba677afaSXin Li for lic := range licsForPackage { 139*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic) 140*ba677afaSXin Li } 141*ba677afaSXin Li sort.Strings(pkg.PackageLicenseInfoFromFiles) 142*ba677afaSXin Li } 143*ba677afaSXin Li 144*ba677afaSXin Li return doc, nil 145*ba677afaSXin Li} 146*ba677afaSXin Li 147*ba677afaSXin Li// ===== 2.2 Searcher functions ===== 148*ba677afaSXin Li 149*ba677afaSXin Li// Config2_2 is a collection of configuration settings for docbuilder 150*ba677afaSXin Li// (for version 2.2 SPDX Documents). A few mandatory fields are set here 151*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_2. 152*ba677afaSXin Litype Config2_2 struct { 153*ba677afaSXin Li // NamespacePrefix should be a URI representing a prefix for the 154*ba677afaSXin Li // namespace with which the SPDX Document will be associated. 155*ba677afaSXin Li // It will be used in the DocumentNamespace field in the CreationInfo 156*ba677afaSXin Li // section, followed by the per-Document package name and a random UUID. 157*ba677afaSXin Li NamespacePrefix string 158*ba677afaSXin Li 159*ba677afaSXin Li // BuilderPathsIgnored lists certain paths to be omitted from the built 160*ba677afaSXin Li // document. Each string should be a path, relative to the package's 161*ba677afaSXin Li // dirRoot, to a specific file or (for all files in a directory) ending 162*ba677afaSXin Li // in a slash. Prefix the string with "**" to omit all instances of that 163*ba677afaSXin Li // file / directory, regardless of where it is in the file tree. 164*ba677afaSXin Li BuilderPathsIgnored []string 165*ba677afaSXin Li 166*ba677afaSXin Li // SearcherPathsIgnored lists certain paths that should not be searched 167*ba677afaSXin Li // by idsearcher, even if those paths have Files present. It uses the 168*ba677afaSXin Li // same format as BuilderPathsIgnored. 169*ba677afaSXin Li SearcherPathsIgnored []string 170*ba677afaSXin Li} 171*ba677afaSXin Li 172*ba677afaSXin Li// BuildIDsDocument2_2 creates an SPDX Document (version 2.2) and searches for 173*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It 174*ba677afaSXin Li// returns that document or error if any is encountered. Arguments: 175*ba677afaSXin Li// - packageName: name of package / directory 176*ba677afaSXin Li// - dirRoot: path to directory to be analyzed 177*ba677afaSXin Li// - namespacePrefix: URI representing a prefix for the 178*ba677afaSXin Li// namespace with which the SPDX Document will be associated 179*ba677afaSXin Lifunc BuildIDsDocument2_2(packageName string, dirRoot string, idconfig *Config2_2) (*v2_2.Document, error) { 180*ba677afaSXin Li // first, build the Document using builder 181*ba677afaSXin Li bconfig := &builder.Config2_2{ 182*ba677afaSXin Li NamespacePrefix: idconfig.NamespacePrefix, 183*ba677afaSXin Li CreatorType: "Tool", 184*ba677afaSXin Li Creator: "github.com/spdx/tools-golang/idsearcher", 185*ba677afaSXin Li PathsIgnored: idconfig.BuilderPathsIgnored, 186*ba677afaSXin Li } 187*ba677afaSXin Li doc, err := builder.Build2_2(packageName, dirRoot, bconfig) 188*ba677afaSXin Li if err != nil { 189*ba677afaSXin Li return nil, err 190*ba677afaSXin Li } 191*ba677afaSXin Li if doc == nil { 192*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Document") 193*ba677afaSXin Li } 194*ba677afaSXin Li if doc.Packages == nil { 195*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Packages map") 196*ba677afaSXin Li } 197*ba677afaSXin Li if len(doc.Packages) != 1 { 198*ba677afaSXin Li return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages)) 199*ba677afaSXin Li } 200*ba677afaSXin Li 201*ba677afaSXin Li // now, walk through each file and find its licenses (if any) 202*ba677afaSXin Li pkg := doc.Packages[0] 203*ba677afaSXin Li if pkg == nil { 204*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Package") 205*ba677afaSXin Li } 206*ba677afaSXin Li if pkg.Files == nil { 207*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Files in Package") 208*ba677afaSXin Li } 209*ba677afaSXin Li licsForPackage := map[string]int{} 210*ba677afaSXin Li for _, f := range pkg.Files { 211*ba677afaSXin Li // start by initializing / clearing values 212*ba677afaSXin Li f.LicenseInfoInFiles = []string{"NOASSERTION"} 213*ba677afaSXin Li f.LicenseConcluded = "NOASSERTION" 214*ba677afaSXin Li 215*ba677afaSXin Li // check whether the searcher should ignore this file 216*ba677afaSXin Li if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) { 217*ba677afaSXin Li continue 218*ba677afaSXin Li } 219*ba677afaSXin Li 220*ba677afaSXin Li fPath := filepath.Join(dirRoot, f.FileName) 221*ba677afaSXin Li // FIXME this is not preferable -- ignoring error 222*ba677afaSXin Li ids, _ := searchFileIDs(fPath) 223*ba677afaSXin Li // FIXME for now, proceed onwards with whatever IDs we obtained. 224*ba677afaSXin Li // FIXME instead of ignoring the error, should probably either log it, 225*ba677afaSXin Li // FIXME and/or enable the caller to configure what should happen. 226*ba677afaSXin Li 227*ba677afaSXin Li // separate out for this file's licenses 228*ba677afaSXin Li licsForFile := map[string]int{} 229*ba677afaSXin Li licsParens := []string{} 230*ba677afaSXin Li for _, lid := range ids { 231*ba677afaSXin Li // get individual elements and add for file and package 232*ba677afaSXin Li licElements := getIndividualLicenses(lid) 233*ba677afaSXin Li for _, elt := range licElements { 234*ba677afaSXin Li licsForFile[elt] = 1 235*ba677afaSXin Li licsForPackage[elt] = 1 236*ba677afaSXin Li } 237*ba677afaSXin Li // parenthesize if needed and add to slice for joining 238*ba677afaSXin Li licsParens = append(licsParens, makeElement(lid)) 239*ba677afaSXin Li } 240*ba677afaSXin Li 241*ba677afaSXin Li // OK -- now we can fill in the file's details, or NOASSERTION if none 242*ba677afaSXin Li if len(licsForFile) > 0 { 243*ba677afaSXin Li f.LicenseInfoInFiles = []string{} 244*ba677afaSXin Li for lic := range licsForFile { 245*ba677afaSXin Li f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic) 246*ba677afaSXin Li } 247*ba677afaSXin Li sort.Strings(f.LicenseInfoInFiles) 248*ba677afaSXin Li // avoid adding parens and joining for single-ID items 249*ba677afaSXin Li if len(licsParens) == 1 { 250*ba677afaSXin Li f.LicenseConcluded = ids[0] 251*ba677afaSXin Li } else { 252*ba677afaSXin Li f.LicenseConcluded = strings.Join(licsParens, " AND ") 253*ba677afaSXin Li } 254*ba677afaSXin Li } 255*ba677afaSXin Li } 256*ba677afaSXin Li 257*ba677afaSXin Li // and finally, we can fill in the package's details 258*ba677afaSXin Li if len(licsForPackage) == 0 { 259*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"} 260*ba677afaSXin Li } else { 261*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{} 262*ba677afaSXin Li for lic := range licsForPackage { 263*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic) 264*ba677afaSXin Li } 265*ba677afaSXin Li sort.Strings(pkg.PackageLicenseInfoFromFiles) 266*ba677afaSXin Li } 267*ba677afaSXin Li 268*ba677afaSXin Li return doc, nil 269*ba677afaSXin Li} 270*ba677afaSXin Li 271*ba677afaSXin Li// ===== 2.3 Searcher functions ===== 272*ba677afaSXin Li 273*ba677afaSXin Li// Config2_3 is a collection of configuration settings for docbuilder 274*ba677afaSXin Li// (for version 2.3 SPDX Documents). A few mandatory fields are set here 275*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_3. 276*ba677afaSXin Litype Config2_3 struct { 277*ba677afaSXin Li // NamespacePrefix should be a URI representing a prefix for the 278*ba677afaSXin Li // namespace with which the SPDX Document will be associated. 279*ba677afaSXin Li // It will be used in the DocumentNamespace field in the CreationInfo 280*ba677afaSXin Li // section, followed by the per-Document package name and a random UUID. 281*ba677afaSXin Li NamespacePrefix string 282*ba677afaSXin Li 283*ba677afaSXin Li // BuilderPathsIgnored lists certain paths to be omitted from the built 284*ba677afaSXin Li // document. Each string should be a path, relative to the package's 285*ba677afaSXin Li // dirRoot, to a specific file or (for all files in a directory) ending 286*ba677afaSXin Li // in a slash. Prefix the string with "**" to omit all instances of that 287*ba677afaSXin Li // file / directory, regardless of where it is in the file tree. 288*ba677afaSXin Li BuilderPathsIgnored []string 289*ba677afaSXin Li 290*ba677afaSXin Li // SearcherPathsIgnored lists certain paths that should not be searched 291*ba677afaSXin Li // by idsearcher, even if those paths have Files present. It uses the 292*ba677afaSXin Li // same format as BuilderPathsIgnored. 293*ba677afaSXin Li SearcherPathsIgnored []string 294*ba677afaSXin Li} 295*ba677afaSXin Li 296*ba677afaSXin Li// BuildIDsDocument2_3 creates an SPDX Document (version 2.3) and searches for 297*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It 298*ba677afaSXin Li// returns that document or error if any is encountered. Arguments: 299*ba677afaSXin Li// - packageName: name of package / directory 300*ba677afaSXin Li// - dirRoot: path to directory to be analyzed 301*ba677afaSXin Li// - namespacePrefix: URI representing a prefix for the 302*ba677afaSXin Li// namespace with which the SPDX Document will be associated 303*ba677afaSXin Lifunc BuildIDsDocument2_3(packageName string, dirRoot string, idconfig *Config2_3) (*v2_3.Document, error) { 304*ba677afaSXin Li // first, build the Document using builder 305*ba677afaSXin Li bconfig := &builder.Config2_3{ 306*ba677afaSXin Li NamespacePrefix: idconfig.NamespacePrefix, 307*ba677afaSXin Li CreatorType: "Tool", 308*ba677afaSXin Li Creator: "github.com/spdx/tools-golang/idsearcher", 309*ba677afaSXin Li PathsIgnored: idconfig.BuilderPathsIgnored, 310*ba677afaSXin Li } 311*ba677afaSXin Li doc, err := builder.Build2_3(packageName, dirRoot, bconfig) 312*ba677afaSXin Li if err != nil { 313*ba677afaSXin Li return nil, err 314*ba677afaSXin Li } 315*ba677afaSXin Li if doc == nil { 316*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Document") 317*ba677afaSXin Li } 318*ba677afaSXin Li if doc.Packages == nil { 319*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Packages map") 320*ba677afaSXin Li } 321*ba677afaSXin Li if len(doc.Packages) != 1 { 322*ba677afaSXin Li return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages)) 323*ba677afaSXin Li } 324*ba677afaSXin Li 325*ba677afaSXin Li // now, walk through each file and find its licenses (if any) 326*ba677afaSXin Li pkg := doc.Packages[0] 327*ba677afaSXin Li if pkg == nil { 328*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Package") 329*ba677afaSXin Li } 330*ba677afaSXin Li if pkg.Files == nil { 331*ba677afaSXin Li return nil, fmt.Errorf("builder returned nil Files in Package") 332*ba677afaSXin Li } 333*ba677afaSXin Li licsForPackage := map[string]int{} 334*ba677afaSXin Li for _, f := range pkg.Files { 335*ba677afaSXin Li // start by initializing / clearing values 336*ba677afaSXin Li f.LicenseInfoInFiles = []string{"NOASSERTION"} 337*ba677afaSXin Li f.LicenseConcluded = "NOASSERTION" 338*ba677afaSXin Li 339*ba677afaSXin Li // check whether the searcher should ignore this file 340*ba677afaSXin Li if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) { 341*ba677afaSXin Li continue 342*ba677afaSXin Li } 343*ba677afaSXin Li 344*ba677afaSXin Li fPath := filepath.Join(dirRoot, f.FileName) 345*ba677afaSXin Li // FIXME this is not preferable -- ignoring error 346*ba677afaSXin Li ids, _ := searchFileIDs(fPath) 347*ba677afaSXin Li // FIXME for now, proceed onwards with whatever IDs we obtained. 348*ba677afaSXin Li // FIXME instead of ignoring the error, should probably either log it, 349*ba677afaSXin Li // FIXME and/or enable the caller to configure what should happen. 350*ba677afaSXin Li 351*ba677afaSXin Li // separate out for this file's licenses 352*ba677afaSXin Li licsForFile := map[string]int{} 353*ba677afaSXin Li licsParens := []string{} 354*ba677afaSXin Li for _, lid := range ids { 355*ba677afaSXin Li // get individual elements and add for file and package 356*ba677afaSXin Li licElements := getIndividualLicenses(lid) 357*ba677afaSXin Li for _, elt := range licElements { 358*ba677afaSXin Li licsForFile[elt] = 1 359*ba677afaSXin Li licsForPackage[elt] = 1 360*ba677afaSXin Li } 361*ba677afaSXin Li // parenthesize if needed and add to slice for joining 362*ba677afaSXin Li licsParens = append(licsParens, makeElement(lid)) 363*ba677afaSXin Li } 364*ba677afaSXin Li 365*ba677afaSXin Li // OK -- now we can fill in the file's details, or NOASSERTION if none 366*ba677afaSXin Li if len(licsForFile) > 0 { 367*ba677afaSXin Li f.LicenseInfoInFiles = []string{} 368*ba677afaSXin Li for lic := range licsForFile { 369*ba677afaSXin Li f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic) 370*ba677afaSXin Li } 371*ba677afaSXin Li sort.Strings(f.LicenseInfoInFiles) 372*ba677afaSXin Li // avoid adding parens and joining for single-ID items 373*ba677afaSXin Li if len(licsParens) == 1 { 374*ba677afaSXin Li f.LicenseConcluded = ids[0] 375*ba677afaSXin Li } else { 376*ba677afaSXin Li f.LicenseConcluded = strings.Join(licsParens, " AND ") 377*ba677afaSXin Li } 378*ba677afaSXin Li } 379*ba677afaSXin Li } 380*ba677afaSXin Li 381*ba677afaSXin Li // and finally, we can fill in the package's details 382*ba677afaSXin Li if len(licsForPackage) == 0 { 383*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"} 384*ba677afaSXin Li } else { 385*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = []string{} 386*ba677afaSXin Li for lic := range licsForPackage { 387*ba677afaSXin Li pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic) 388*ba677afaSXin Li } 389*ba677afaSXin Li sort.Strings(pkg.PackageLicenseInfoFromFiles) 390*ba677afaSXin Li } 391*ba677afaSXin Li 392*ba677afaSXin Li return doc, nil 393*ba677afaSXin Li} 394*ba677afaSXin Li 395*ba677afaSXin Li// ===== Utility functions (not version-specific) ===== 396*ba677afaSXin Lifunc searchFileIDs(filePath string) ([]string, error) { 397*ba677afaSXin Li idsMap := map[string]int{} 398*ba677afaSXin Li ids := []string{} 399*ba677afaSXin Li 400*ba677afaSXin Li f, err := os.Open(filePath) 401*ba677afaSXin Li if err != nil { 402*ba677afaSXin Li return nil, err 403*ba677afaSXin Li } 404*ba677afaSXin Li defer f.Close() 405*ba677afaSXin Li 406*ba677afaSXin Li scanner := bufio.NewScanner(f) 407*ba677afaSXin Li 408*ba677afaSXin Li for scanner.Scan() { 409*ba677afaSXin Li if strings.Contains(scanner.Text(), "SPDX-License-Identifier:") { 410*ba677afaSXin Li strs := strings.SplitN(scanner.Text(), "SPDX-License-Identifier:", 2) 411*ba677afaSXin Li 412*ba677afaSXin Li // if prefixed by more than n characters, it's probably not a 413*ba677afaSXin Li // short-form ID; it's probably code to detect short-form IDs. 414*ba677afaSXin Li // Like this function itself, for example =) 415*ba677afaSXin Li prefix := stripTrash(strs[0]) 416*ba677afaSXin Li if len(prefix) > 5 { 417*ba677afaSXin Li continue 418*ba677afaSXin Li } 419*ba677afaSXin Li 420*ba677afaSXin Li // stop before trailing */ if it is present 421*ba677afaSXin Li lidToExtract := strs[1] 422*ba677afaSXin Li lidToExtract = strings.Split(lidToExtract, "*/")[0] 423*ba677afaSXin Li lid := strings.TrimSpace(lidToExtract) 424*ba677afaSXin Li lid = stripTrash(lid) 425*ba677afaSXin Li idsMap[lid] = 1 426*ba677afaSXin Li } 427*ba677afaSXin Li } 428*ba677afaSXin Li 429*ba677afaSXin Li // FIXME for now, ignore scanner errors because we want to return whatever 430*ba677afaSXin Li // FIXME IDs were in fact found. should probably be changed to either 431*ba677afaSXin Li // FIXME log the error, and/or be configurable for what should happen. 432*ba677afaSXin Li // if err = scanner.Err(); err != nil { 433*ba677afaSXin Li // return nil, err 434*ba677afaSXin Li // } 435*ba677afaSXin Li 436*ba677afaSXin Li // now, convert map to string 437*ba677afaSXin Li for lid := range idsMap { 438*ba677afaSXin Li ids = append(ids, lid) 439*ba677afaSXin Li } 440*ba677afaSXin Li 441*ba677afaSXin Li // and sort it 442*ba677afaSXin Li sort.Strings(ids) 443*ba677afaSXin Li 444*ba677afaSXin Li return ids, nil 445*ba677afaSXin Li} 446*ba677afaSXin Li 447*ba677afaSXin Lifunc stripTrash(lid string) string { 448*ba677afaSXin Li re := regexp.MustCompile(`[^\w\s\d.\-\+()]+`) 449*ba677afaSXin Li return re.ReplaceAllString(lid, "") 450*ba677afaSXin Li} 451*ba677afaSXin Li 452*ba677afaSXin Lifunc makeElement(lic string) string { 453*ba677afaSXin Li if strings.Contains(lic, " AND ") || strings.Contains(lic, " OR ") { 454*ba677afaSXin Li return fmt.Sprintf("(%s)", lic) 455*ba677afaSXin Li } 456*ba677afaSXin Li 457*ba677afaSXin Li return lic 458*ba677afaSXin Li} 459*ba677afaSXin Li 460*ba677afaSXin Lifunc getIndividualLicenses(lic string) []string { 461*ba677afaSXin Li // replace parens and '+' with spaces 462*ba677afaSXin Li lic = strings.Replace(lic, "(", " ", -1) 463*ba677afaSXin Li lic = strings.Replace(lic, ")", " ", -1) 464*ba677afaSXin Li lic = strings.Replace(lic, "+", " ", -1) 465*ba677afaSXin Li 466*ba677afaSXin Li // now, split by spaces, trim, and add to slice 467*ba677afaSXin Li licElements := strings.Split(lic, " ") 468*ba677afaSXin Li lics := []string{} 469*ba677afaSXin Li for _, elt := range licElements { 470*ba677afaSXin Li elt := strings.TrimSpace(elt) 471*ba677afaSXin Li // don't add if empty or if case-insensitive operator 472*ba677afaSXin Li if elt == "" || strings.EqualFold(elt, "AND") || 473*ba677afaSXin Li strings.EqualFold(elt, "OR") || strings.EqualFold(elt, "WITH") { 474*ba677afaSXin Li continue 475*ba677afaSXin Li } 476*ba677afaSXin Li 477*ba677afaSXin Li lics = append(lics, elt) 478*ba677afaSXin Li } 479*ba677afaSXin Li 480*ba677afaSXin Li // sort before returning 481*ba677afaSXin Li sort.Strings(lics) 482*ba677afaSXin Li return lics 483*ba677afaSXin Li} 484