xref: /aosp_15_r20/external/spdx-tools/idsearcher/idsearcher.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1*ba677afaSXin Li// Package idsearcher is used to search for short-form IDs in files
2*ba677afaSXin Li// within a directory, and to build an SPDX Document containing those
3*ba677afaSXin Li// license findings.
4*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
5*ba677afaSXin Lipackage idsearcher
6*ba677afaSXin Li
7*ba677afaSXin Liimport (
8*ba677afaSXin Li	"bufio"
9*ba677afaSXin Li	"fmt"
10*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/v2_3"
11*ba677afaSXin Li	"os"
12*ba677afaSXin Li	"path/filepath"
13*ba677afaSXin Li	"regexp"
14*ba677afaSXin Li	"sort"
15*ba677afaSXin Li	"strings"
16*ba677afaSXin Li
17*ba677afaSXin Li	"github.com/spdx/tools-golang/builder"
18*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/v2_1"
19*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/v2_2"
20*ba677afaSXin Li	"github.com/spdx/tools-golang/utils"
21*ba677afaSXin Li)
22*ba677afaSXin Li
23*ba677afaSXin Li// ===== 2.1 Searcher functions =====
24*ba677afaSXin Li
25*ba677afaSXin Li// Config2_1 is a collection of configuration settings for docbuilder
26*ba677afaSXin Li// (for version 2.1 SPDX Documents). A few mandatory fields are set here
27*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_1.
28*ba677afaSXin Litype Config2_1 struct {
29*ba677afaSXin Li	// NamespacePrefix should be a URI representing a prefix for the
30*ba677afaSXin Li	// namespace with which the SPDX Document will be associated.
31*ba677afaSXin Li	// It will be used in the DocumentNamespace field in the CreationInfo
32*ba677afaSXin Li	// section, followed by the per-Document package name and a random UUID.
33*ba677afaSXin Li	NamespacePrefix string
34*ba677afaSXin Li
35*ba677afaSXin Li	// BuilderPathsIgnored lists certain paths to be omitted from the built
36*ba677afaSXin Li	// document. Each string should be a path, relative to the package's
37*ba677afaSXin Li	// dirRoot, to a specific file or (for all files in a directory) ending
38*ba677afaSXin Li	// in a slash. Prefix the string with "**" to omit all instances of that
39*ba677afaSXin Li	// file / directory, regardless of where it is in the file tree.
40*ba677afaSXin Li	BuilderPathsIgnored []string
41*ba677afaSXin Li
42*ba677afaSXin Li	// SearcherPathsIgnored lists certain paths that should not be searched
43*ba677afaSXin Li	// by idsearcher, even if those paths have Files present. It uses the
44*ba677afaSXin Li	// same format as BuilderPathsIgnored.
45*ba677afaSXin Li	SearcherPathsIgnored []string
46*ba677afaSXin Li}
47*ba677afaSXin Li
48*ba677afaSXin Li// BuildIDsDocument2_1 creates an SPDX Document (version 2.1) and searches for
49*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It
50*ba677afaSXin Li// returns that document or error if any is encountered. Arguments:
51*ba677afaSXin Li//   - packageName: name of package / directory
52*ba677afaSXin Li//   - dirRoot: path to directory to be analyzed
53*ba677afaSXin Li//   - namespacePrefix: URI representing a prefix for the
54*ba677afaSXin Li//     namespace with which the SPDX Document will be associated
55*ba677afaSXin Lifunc BuildIDsDocument2_1(packageName string, dirRoot string, idconfig *Config2_1) (*v2_1.Document, error) {
56*ba677afaSXin Li	// first, build the Document using builder
57*ba677afaSXin Li	bconfig := &builder.Config2_1{
58*ba677afaSXin Li		NamespacePrefix: idconfig.NamespacePrefix,
59*ba677afaSXin Li		CreatorType:     "Tool",
60*ba677afaSXin Li		Creator:         "github.com/spdx/tools-golang/idsearcher",
61*ba677afaSXin Li		PathsIgnored:    idconfig.BuilderPathsIgnored,
62*ba677afaSXin Li	}
63*ba677afaSXin Li	doc, err := builder.Build2_1(packageName, dirRoot, bconfig)
64*ba677afaSXin Li	if err != nil {
65*ba677afaSXin Li		return nil, err
66*ba677afaSXin Li	}
67*ba677afaSXin Li	if doc == nil {
68*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Document")
69*ba677afaSXin Li	}
70*ba677afaSXin Li	if doc.Packages == nil {
71*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Packages map")
72*ba677afaSXin Li	}
73*ba677afaSXin Li	if len(doc.Packages) != 1 {
74*ba677afaSXin Li		return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages))
75*ba677afaSXin Li	}
76*ba677afaSXin Li
77*ba677afaSXin Li	// now, walk through each file and find its licenses (if any)
78*ba677afaSXin Li	pkg := doc.Packages[0]
79*ba677afaSXin Li	if pkg == nil {
80*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Package")
81*ba677afaSXin Li	}
82*ba677afaSXin Li	if pkg.Files == nil {
83*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Files in Package")
84*ba677afaSXin Li	}
85*ba677afaSXin Li	licsForPackage := map[string]int{}
86*ba677afaSXin Li	for _, f := range pkg.Files {
87*ba677afaSXin Li		// start by initializing / clearing values
88*ba677afaSXin Li		f.LicenseInfoInFiles = []string{"NOASSERTION"}
89*ba677afaSXin Li		f.LicenseConcluded = "NOASSERTION"
90*ba677afaSXin Li
91*ba677afaSXin Li		// check whether the searcher should ignore this file
92*ba677afaSXin Li		if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) {
93*ba677afaSXin Li			continue
94*ba677afaSXin Li		}
95*ba677afaSXin Li
96*ba677afaSXin Li		fPath := filepath.Join(dirRoot, f.FileName)
97*ba677afaSXin Li		// FIXME this is not preferable -- ignoring error
98*ba677afaSXin Li		ids, _ := searchFileIDs(fPath)
99*ba677afaSXin Li		// FIXME for now, proceed onwards with whatever IDs we obtained.
100*ba677afaSXin Li		// FIXME instead of ignoring the error, should probably either log it,
101*ba677afaSXin Li		// FIXME and/or enable the caller to configure what should happen.
102*ba677afaSXin Li
103*ba677afaSXin Li		// separate out for this file's licenses
104*ba677afaSXin Li		licsForFile := map[string]int{}
105*ba677afaSXin Li		licsParens := []string{}
106*ba677afaSXin Li		for _, lid := range ids {
107*ba677afaSXin Li			// get individual elements and add for file and package
108*ba677afaSXin Li			licElements := getIndividualLicenses(lid)
109*ba677afaSXin Li			for _, elt := range licElements {
110*ba677afaSXin Li				licsForFile[elt] = 1
111*ba677afaSXin Li				licsForPackage[elt] = 1
112*ba677afaSXin Li			}
113*ba677afaSXin Li			// parenthesize if needed and add to slice for joining
114*ba677afaSXin Li			licsParens = append(licsParens, makeElement(lid))
115*ba677afaSXin Li		}
116*ba677afaSXin Li
117*ba677afaSXin Li		// OK -- now we can fill in the file's details, or NOASSERTION if none
118*ba677afaSXin Li		if len(licsForFile) > 0 {
119*ba677afaSXin Li			f.LicenseInfoInFiles = []string{}
120*ba677afaSXin Li			for lic := range licsForFile {
121*ba677afaSXin Li				f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic)
122*ba677afaSXin Li			}
123*ba677afaSXin Li			sort.Strings(f.LicenseInfoInFiles)
124*ba677afaSXin Li			// avoid adding parens and joining for single-ID items
125*ba677afaSXin Li			if len(licsParens) == 1 {
126*ba677afaSXin Li				f.LicenseConcluded = ids[0]
127*ba677afaSXin Li			} else {
128*ba677afaSXin Li				f.LicenseConcluded = strings.Join(licsParens, " AND ")
129*ba677afaSXin Li			}
130*ba677afaSXin Li		}
131*ba677afaSXin Li	}
132*ba677afaSXin Li
133*ba677afaSXin Li	// and finally, we can fill in the package's details
134*ba677afaSXin Li	if len(licsForPackage) == 0 {
135*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"}
136*ba677afaSXin Li	} else {
137*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{}
138*ba677afaSXin Li		for lic := range licsForPackage {
139*ba677afaSXin Li			pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic)
140*ba677afaSXin Li		}
141*ba677afaSXin Li		sort.Strings(pkg.PackageLicenseInfoFromFiles)
142*ba677afaSXin Li	}
143*ba677afaSXin Li
144*ba677afaSXin Li	return doc, nil
145*ba677afaSXin Li}
146*ba677afaSXin Li
147*ba677afaSXin Li// ===== 2.2 Searcher functions =====
148*ba677afaSXin Li
149*ba677afaSXin Li// Config2_2 is a collection of configuration settings for docbuilder
150*ba677afaSXin Li// (for version 2.2 SPDX Documents). A few mandatory fields are set here
151*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_2.
152*ba677afaSXin Litype Config2_2 struct {
153*ba677afaSXin Li	// NamespacePrefix should be a URI representing a prefix for the
154*ba677afaSXin Li	// namespace with which the SPDX Document will be associated.
155*ba677afaSXin Li	// It will be used in the DocumentNamespace field in the CreationInfo
156*ba677afaSXin Li	// section, followed by the per-Document package name and a random UUID.
157*ba677afaSXin Li	NamespacePrefix string
158*ba677afaSXin Li
159*ba677afaSXin Li	// BuilderPathsIgnored lists certain paths to be omitted from the built
160*ba677afaSXin Li	// document. Each string should be a path, relative to the package's
161*ba677afaSXin Li	// dirRoot, to a specific file or (for all files in a directory) ending
162*ba677afaSXin Li	// in a slash. Prefix the string with "**" to omit all instances of that
163*ba677afaSXin Li	// file / directory, regardless of where it is in the file tree.
164*ba677afaSXin Li	BuilderPathsIgnored []string
165*ba677afaSXin Li
166*ba677afaSXin Li	// SearcherPathsIgnored lists certain paths that should not be searched
167*ba677afaSXin Li	// by idsearcher, even if those paths have Files present. It uses the
168*ba677afaSXin Li	// same format as BuilderPathsIgnored.
169*ba677afaSXin Li	SearcherPathsIgnored []string
170*ba677afaSXin Li}
171*ba677afaSXin Li
172*ba677afaSXin Li// BuildIDsDocument2_2 creates an SPDX Document (version 2.2) and searches for
173*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It
174*ba677afaSXin Li// returns that document or error if any is encountered. Arguments:
175*ba677afaSXin Li//   - packageName: name of package / directory
176*ba677afaSXin Li//   - dirRoot: path to directory to be analyzed
177*ba677afaSXin Li//   - namespacePrefix: URI representing a prefix for the
178*ba677afaSXin Li//     namespace with which the SPDX Document will be associated
179*ba677afaSXin Lifunc BuildIDsDocument2_2(packageName string, dirRoot string, idconfig *Config2_2) (*v2_2.Document, error) {
180*ba677afaSXin Li	// first, build the Document using builder
181*ba677afaSXin Li	bconfig := &builder.Config2_2{
182*ba677afaSXin Li		NamespacePrefix: idconfig.NamespacePrefix,
183*ba677afaSXin Li		CreatorType:     "Tool",
184*ba677afaSXin Li		Creator:         "github.com/spdx/tools-golang/idsearcher",
185*ba677afaSXin Li		PathsIgnored:    idconfig.BuilderPathsIgnored,
186*ba677afaSXin Li	}
187*ba677afaSXin Li	doc, err := builder.Build2_2(packageName, dirRoot, bconfig)
188*ba677afaSXin Li	if err != nil {
189*ba677afaSXin Li		return nil, err
190*ba677afaSXin Li	}
191*ba677afaSXin Li	if doc == nil {
192*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Document")
193*ba677afaSXin Li	}
194*ba677afaSXin Li	if doc.Packages == nil {
195*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Packages map")
196*ba677afaSXin Li	}
197*ba677afaSXin Li	if len(doc.Packages) != 1 {
198*ba677afaSXin Li		return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages))
199*ba677afaSXin Li	}
200*ba677afaSXin Li
201*ba677afaSXin Li	// now, walk through each file and find its licenses (if any)
202*ba677afaSXin Li	pkg := doc.Packages[0]
203*ba677afaSXin Li	if pkg == nil {
204*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Package")
205*ba677afaSXin Li	}
206*ba677afaSXin Li	if pkg.Files == nil {
207*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Files in Package")
208*ba677afaSXin Li	}
209*ba677afaSXin Li	licsForPackage := map[string]int{}
210*ba677afaSXin Li	for _, f := range pkg.Files {
211*ba677afaSXin Li		// start by initializing / clearing values
212*ba677afaSXin Li		f.LicenseInfoInFiles = []string{"NOASSERTION"}
213*ba677afaSXin Li		f.LicenseConcluded = "NOASSERTION"
214*ba677afaSXin Li
215*ba677afaSXin Li		// check whether the searcher should ignore this file
216*ba677afaSXin Li		if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) {
217*ba677afaSXin Li			continue
218*ba677afaSXin Li		}
219*ba677afaSXin Li
220*ba677afaSXin Li		fPath := filepath.Join(dirRoot, f.FileName)
221*ba677afaSXin Li		// FIXME this is not preferable -- ignoring error
222*ba677afaSXin Li		ids, _ := searchFileIDs(fPath)
223*ba677afaSXin Li		// FIXME for now, proceed onwards with whatever IDs we obtained.
224*ba677afaSXin Li		// FIXME instead of ignoring the error, should probably either log it,
225*ba677afaSXin Li		// FIXME and/or enable the caller to configure what should happen.
226*ba677afaSXin Li
227*ba677afaSXin Li		// separate out for this file's licenses
228*ba677afaSXin Li		licsForFile := map[string]int{}
229*ba677afaSXin Li		licsParens := []string{}
230*ba677afaSXin Li		for _, lid := range ids {
231*ba677afaSXin Li			// get individual elements and add for file and package
232*ba677afaSXin Li			licElements := getIndividualLicenses(lid)
233*ba677afaSXin Li			for _, elt := range licElements {
234*ba677afaSXin Li				licsForFile[elt] = 1
235*ba677afaSXin Li				licsForPackage[elt] = 1
236*ba677afaSXin Li			}
237*ba677afaSXin Li			// parenthesize if needed and add to slice for joining
238*ba677afaSXin Li			licsParens = append(licsParens, makeElement(lid))
239*ba677afaSXin Li		}
240*ba677afaSXin Li
241*ba677afaSXin Li		// OK -- now we can fill in the file's details, or NOASSERTION if none
242*ba677afaSXin Li		if len(licsForFile) > 0 {
243*ba677afaSXin Li			f.LicenseInfoInFiles = []string{}
244*ba677afaSXin Li			for lic := range licsForFile {
245*ba677afaSXin Li				f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic)
246*ba677afaSXin Li			}
247*ba677afaSXin Li			sort.Strings(f.LicenseInfoInFiles)
248*ba677afaSXin Li			// avoid adding parens and joining for single-ID items
249*ba677afaSXin Li			if len(licsParens) == 1 {
250*ba677afaSXin Li				f.LicenseConcluded = ids[0]
251*ba677afaSXin Li			} else {
252*ba677afaSXin Li				f.LicenseConcluded = strings.Join(licsParens, " AND ")
253*ba677afaSXin Li			}
254*ba677afaSXin Li		}
255*ba677afaSXin Li	}
256*ba677afaSXin Li
257*ba677afaSXin Li	// and finally, we can fill in the package's details
258*ba677afaSXin Li	if len(licsForPackage) == 0 {
259*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"}
260*ba677afaSXin Li	} else {
261*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{}
262*ba677afaSXin Li		for lic := range licsForPackage {
263*ba677afaSXin Li			pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic)
264*ba677afaSXin Li		}
265*ba677afaSXin Li		sort.Strings(pkg.PackageLicenseInfoFromFiles)
266*ba677afaSXin Li	}
267*ba677afaSXin Li
268*ba677afaSXin Li	return doc, nil
269*ba677afaSXin Li}
270*ba677afaSXin Li
271*ba677afaSXin Li// ===== 2.3 Searcher functions =====
272*ba677afaSXin Li
273*ba677afaSXin Li// Config2_3 is a collection of configuration settings for docbuilder
274*ba677afaSXin Li// (for version 2.3 SPDX Documents). A few mandatory fields are set here
275*ba677afaSXin Li// so that they can be repeatedly reused in multiple calls to Build2_3.
276*ba677afaSXin Litype Config2_3 struct {
277*ba677afaSXin Li	// NamespacePrefix should be a URI representing a prefix for the
278*ba677afaSXin Li	// namespace with which the SPDX Document will be associated.
279*ba677afaSXin Li	// It will be used in the DocumentNamespace field in the CreationInfo
280*ba677afaSXin Li	// section, followed by the per-Document package name and a random UUID.
281*ba677afaSXin Li	NamespacePrefix string
282*ba677afaSXin Li
283*ba677afaSXin Li	// BuilderPathsIgnored lists certain paths to be omitted from the built
284*ba677afaSXin Li	// document. Each string should be a path, relative to the package's
285*ba677afaSXin Li	// dirRoot, to a specific file or (for all files in a directory) ending
286*ba677afaSXin Li	// in a slash. Prefix the string with "**" to omit all instances of that
287*ba677afaSXin Li	// file / directory, regardless of where it is in the file tree.
288*ba677afaSXin Li	BuilderPathsIgnored []string
289*ba677afaSXin Li
290*ba677afaSXin Li	// SearcherPathsIgnored lists certain paths that should not be searched
291*ba677afaSXin Li	// by idsearcher, even if those paths have Files present. It uses the
292*ba677afaSXin Li	// same format as BuilderPathsIgnored.
293*ba677afaSXin Li	SearcherPathsIgnored []string
294*ba677afaSXin Li}
295*ba677afaSXin Li
296*ba677afaSXin Li// BuildIDsDocument2_3 creates an SPDX Document (version 2.3) and searches for
297*ba677afaSXin Li// short-form IDs in each file, filling in license fields as appropriate. It
298*ba677afaSXin Li// returns that document or error if any is encountered. Arguments:
299*ba677afaSXin Li//   - packageName: name of package / directory
300*ba677afaSXin Li//   - dirRoot: path to directory to be analyzed
301*ba677afaSXin Li//   - namespacePrefix: URI representing a prefix for the
302*ba677afaSXin Li//     namespace with which the SPDX Document will be associated
303*ba677afaSXin Lifunc BuildIDsDocument2_3(packageName string, dirRoot string, idconfig *Config2_3) (*v2_3.Document, error) {
304*ba677afaSXin Li	// first, build the Document using builder
305*ba677afaSXin Li	bconfig := &builder.Config2_3{
306*ba677afaSXin Li		NamespacePrefix: idconfig.NamespacePrefix,
307*ba677afaSXin Li		CreatorType:     "Tool",
308*ba677afaSXin Li		Creator:         "github.com/spdx/tools-golang/idsearcher",
309*ba677afaSXin Li		PathsIgnored:    idconfig.BuilderPathsIgnored,
310*ba677afaSXin Li	}
311*ba677afaSXin Li	doc, err := builder.Build2_3(packageName, dirRoot, bconfig)
312*ba677afaSXin Li	if err != nil {
313*ba677afaSXin Li		return nil, err
314*ba677afaSXin Li	}
315*ba677afaSXin Li	if doc == nil {
316*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Document")
317*ba677afaSXin Li	}
318*ba677afaSXin Li	if doc.Packages == nil {
319*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Packages map")
320*ba677afaSXin Li	}
321*ba677afaSXin Li	if len(doc.Packages) != 1 {
322*ba677afaSXin Li		return nil, fmt.Errorf("builder returned %d Packages", len(doc.Packages))
323*ba677afaSXin Li	}
324*ba677afaSXin Li
325*ba677afaSXin Li	// now, walk through each file and find its licenses (if any)
326*ba677afaSXin Li	pkg := doc.Packages[0]
327*ba677afaSXin Li	if pkg == nil {
328*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Package")
329*ba677afaSXin Li	}
330*ba677afaSXin Li	if pkg.Files == nil {
331*ba677afaSXin Li		return nil, fmt.Errorf("builder returned nil Files in Package")
332*ba677afaSXin Li	}
333*ba677afaSXin Li	licsForPackage := map[string]int{}
334*ba677afaSXin Li	for _, f := range pkg.Files {
335*ba677afaSXin Li		// start by initializing / clearing values
336*ba677afaSXin Li		f.LicenseInfoInFiles = []string{"NOASSERTION"}
337*ba677afaSXin Li		f.LicenseConcluded = "NOASSERTION"
338*ba677afaSXin Li
339*ba677afaSXin Li		// check whether the searcher should ignore this file
340*ba677afaSXin Li		if utils.ShouldIgnore(f.FileName, idconfig.SearcherPathsIgnored) {
341*ba677afaSXin Li			continue
342*ba677afaSXin Li		}
343*ba677afaSXin Li
344*ba677afaSXin Li		fPath := filepath.Join(dirRoot, f.FileName)
345*ba677afaSXin Li		// FIXME this is not preferable -- ignoring error
346*ba677afaSXin Li		ids, _ := searchFileIDs(fPath)
347*ba677afaSXin Li		// FIXME for now, proceed onwards with whatever IDs we obtained.
348*ba677afaSXin Li		// FIXME instead of ignoring the error, should probably either log it,
349*ba677afaSXin Li		// FIXME and/or enable the caller to configure what should happen.
350*ba677afaSXin Li
351*ba677afaSXin Li		// separate out for this file's licenses
352*ba677afaSXin Li		licsForFile := map[string]int{}
353*ba677afaSXin Li		licsParens := []string{}
354*ba677afaSXin Li		for _, lid := range ids {
355*ba677afaSXin Li			// get individual elements and add for file and package
356*ba677afaSXin Li			licElements := getIndividualLicenses(lid)
357*ba677afaSXin Li			for _, elt := range licElements {
358*ba677afaSXin Li				licsForFile[elt] = 1
359*ba677afaSXin Li				licsForPackage[elt] = 1
360*ba677afaSXin Li			}
361*ba677afaSXin Li			// parenthesize if needed and add to slice for joining
362*ba677afaSXin Li			licsParens = append(licsParens, makeElement(lid))
363*ba677afaSXin Li		}
364*ba677afaSXin Li
365*ba677afaSXin Li		// OK -- now we can fill in the file's details, or NOASSERTION if none
366*ba677afaSXin Li		if len(licsForFile) > 0 {
367*ba677afaSXin Li			f.LicenseInfoInFiles = []string{}
368*ba677afaSXin Li			for lic := range licsForFile {
369*ba677afaSXin Li				f.LicenseInfoInFiles = append(f.LicenseInfoInFiles, lic)
370*ba677afaSXin Li			}
371*ba677afaSXin Li			sort.Strings(f.LicenseInfoInFiles)
372*ba677afaSXin Li			// avoid adding parens and joining for single-ID items
373*ba677afaSXin Li			if len(licsParens) == 1 {
374*ba677afaSXin Li				f.LicenseConcluded = ids[0]
375*ba677afaSXin Li			} else {
376*ba677afaSXin Li				f.LicenseConcluded = strings.Join(licsParens, " AND ")
377*ba677afaSXin Li			}
378*ba677afaSXin Li		}
379*ba677afaSXin Li	}
380*ba677afaSXin Li
381*ba677afaSXin Li	// and finally, we can fill in the package's details
382*ba677afaSXin Li	if len(licsForPackage) == 0 {
383*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{"NOASSERTION"}
384*ba677afaSXin Li	} else {
385*ba677afaSXin Li		pkg.PackageLicenseInfoFromFiles = []string{}
386*ba677afaSXin Li		for lic := range licsForPackage {
387*ba677afaSXin Li			pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, lic)
388*ba677afaSXin Li		}
389*ba677afaSXin Li		sort.Strings(pkg.PackageLicenseInfoFromFiles)
390*ba677afaSXin Li	}
391*ba677afaSXin Li
392*ba677afaSXin Li	return doc, nil
393*ba677afaSXin Li}
394*ba677afaSXin Li
395*ba677afaSXin Li// ===== Utility functions (not version-specific) =====
396*ba677afaSXin Lifunc searchFileIDs(filePath string) ([]string, error) {
397*ba677afaSXin Li	idsMap := map[string]int{}
398*ba677afaSXin Li	ids := []string{}
399*ba677afaSXin Li
400*ba677afaSXin Li	f, err := os.Open(filePath)
401*ba677afaSXin Li	if err != nil {
402*ba677afaSXin Li		return nil, err
403*ba677afaSXin Li	}
404*ba677afaSXin Li	defer f.Close()
405*ba677afaSXin Li
406*ba677afaSXin Li	scanner := bufio.NewScanner(f)
407*ba677afaSXin Li
408*ba677afaSXin Li	for scanner.Scan() {
409*ba677afaSXin Li		if strings.Contains(scanner.Text(), "SPDX-License-Identifier:") {
410*ba677afaSXin Li			strs := strings.SplitN(scanner.Text(), "SPDX-License-Identifier:", 2)
411*ba677afaSXin Li
412*ba677afaSXin Li			// if prefixed by more than n characters, it's probably not a
413*ba677afaSXin Li			// short-form ID; it's probably code to detect short-form IDs.
414*ba677afaSXin Li			// Like this function itself, for example  =)
415*ba677afaSXin Li			prefix := stripTrash(strs[0])
416*ba677afaSXin Li			if len(prefix) > 5 {
417*ba677afaSXin Li				continue
418*ba677afaSXin Li			}
419*ba677afaSXin Li
420*ba677afaSXin Li			// stop before trailing */ if it is present
421*ba677afaSXin Li			lidToExtract := strs[1]
422*ba677afaSXin Li			lidToExtract = strings.Split(lidToExtract, "*/")[0]
423*ba677afaSXin Li			lid := strings.TrimSpace(lidToExtract)
424*ba677afaSXin Li			lid = stripTrash(lid)
425*ba677afaSXin Li			idsMap[lid] = 1
426*ba677afaSXin Li		}
427*ba677afaSXin Li	}
428*ba677afaSXin Li
429*ba677afaSXin Li	// FIXME for now, ignore scanner errors because we want to return whatever
430*ba677afaSXin Li	// FIXME IDs were in fact found. should probably be changed to either
431*ba677afaSXin Li	// FIXME log the error, and/or be configurable for what should happen.
432*ba677afaSXin Li	// if err = scanner.Err(); err != nil {
433*ba677afaSXin Li	// 	return nil, err
434*ba677afaSXin Li	// }
435*ba677afaSXin Li
436*ba677afaSXin Li	// now, convert map to string
437*ba677afaSXin Li	for lid := range idsMap {
438*ba677afaSXin Li		ids = append(ids, lid)
439*ba677afaSXin Li	}
440*ba677afaSXin Li
441*ba677afaSXin Li	// and sort it
442*ba677afaSXin Li	sort.Strings(ids)
443*ba677afaSXin Li
444*ba677afaSXin Li	return ids, nil
445*ba677afaSXin Li}
446*ba677afaSXin Li
447*ba677afaSXin Lifunc stripTrash(lid string) string {
448*ba677afaSXin Li	re := regexp.MustCompile(`[^\w\s\d.\-\+()]+`)
449*ba677afaSXin Li	return re.ReplaceAllString(lid, "")
450*ba677afaSXin Li}
451*ba677afaSXin Li
452*ba677afaSXin Lifunc makeElement(lic string) string {
453*ba677afaSXin Li	if strings.Contains(lic, " AND ") || strings.Contains(lic, " OR ") {
454*ba677afaSXin Li		return fmt.Sprintf("(%s)", lic)
455*ba677afaSXin Li	}
456*ba677afaSXin Li
457*ba677afaSXin Li	return lic
458*ba677afaSXin Li}
459*ba677afaSXin Li
460*ba677afaSXin Lifunc getIndividualLicenses(lic string) []string {
461*ba677afaSXin Li	// replace parens and '+' with spaces
462*ba677afaSXin Li	lic = strings.Replace(lic, "(", " ", -1)
463*ba677afaSXin Li	lic = strings.Replace(lic, ")", " ", -1)
464*ba677afaSXin Li	lic = strings.Replace(lic, "+", " ", -1)
465*ba677afaSXin Li
466*ba677afaSXin Li	// now, split by spaces, trim, and add to slice
467*ba677afaSXin Li	licElements := strings.Split(lic, " ")
468*ba677afaSXin Li	lics := []string{}
469*ba677afaSXin Li	for _, elt := range licElements {
470*ba677afaSXin Li		elt := strings.TrimSpace(elt)
471*ba677afaSXin Li		// don't add if empty or if case-insensitive operator
472*ba677afaSXin Li		if elt == "" || strings.EqualFold(elt, "AND") ||
473*ba677afaSXin Li			strings.EqualFold(elt, "OR") || strings.EqualFold(elt, "WITH") {
474*ba677afaSXin Li			continue
475*ba677afaSXin Li		}
476*ba677afaSXin Li
477*ba677afaSXin Li		lics = append(lics, elt)
478*ba677afaSXin Li	}
479*ba677afaSXin Li
480*ba677afaSXin Li	// sort before returning
481*ba677afaSXin Li	sort.Strings(lics)
482*ba677afaSXin Li	return lics
483*ba677afaSXin Li}
484