xref: /aosp_15_r20/external/spdx-tools/rdfloader/parser2v3/parse_file.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2
3package parser2v3
4
5import (
6	"fmt"
7	"strings"
8
9	gordfParser "github.com/spdx/gordf/rdfloader/parser"
10	"github.com/spdx/tools-golang/spdx/common"
11	"github.com/spdx/tools-golang/spdx/v2_3"
12)
13
14// returns a file instance and the error if any encountered.
15func (parser *rdfParser2_3) getFileFromNode(fileNode *gordfParser.Node) (file *v2_3.File, err error) {
16	file = &v2_3.File{}
17
18	currState := parser.cache[fileNode.ID]
19	if currState == nil {
20		// this is the first time we are seeing this node.
21		parser.cache[fileNode.ID] = &nodeState{
22			object: file,
23			Color:  WHITE,
24		}
25	} else if currState.Color == GREY {
26		// we have already started parsing this file node and we needn't parse it again.
27		return currState.object.(*v2_3.File), nil
28	}
29
30	// setting color to grey to indicate that we've started parsing this node.
31	parser.cache[fileNode.ID].Color = GREY
32
33	// setting color to black just before function returns to the caller to
34	// indicate that parsing current node is complete.
35	defer func() { parser.cache[fileNode.ID].Color = BLACK }()
36
37	err = setFileIdentifier(fileNode.ID, file) // 4.2
38	if err != nil {
39		return nil, err
40	}
41
42	if existingFile := parser.files[file.FileSPDXIdentifier]; existingFile != nil {
43		file = existingFile
44	}
45
46	for _, subTriple := range parser.nodeToTriples(fileNode) {
47		switch subTriple.Predicate.ID {
48		case SPDX_FILE_NAME: // 4.1
49			// cardinality: exactly 1
50			file.FileName = subTriple.Object.ID
51		case SPDX_NAME:
52			// cardinality: exactly 1
53			// TODO: check where it will be set in the golang-tools spdx-data-model
54		case RDF_TYPE:
55			// cardinality: exactly 1
56		case SPDX_FILE_TYPE: // 4.3
57			// cardinality: min 0
58			fileType := ""
59			fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID)
60			file.FileTypes = append(file.FileTypes, fileType)
61		case SPDX_CHECKSUM: // 4.4
62			// cardinality: min 1
63			err = parser.setFileChecksumFromNode(file, subTriple.Object)
64		case SPDX_LICENSE_CONCLUDED: // 4.5
65			// cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion)
66			anyLicense, err := parser.getAnyLicenseFromNode(subTriple.Object)
67			if err != nil {
68				return nil, fmt.Errorf("error parsing licenseConcluded: %v", err)
69			}
70			file.LicenseConcluded = anyLicense.ToLicenseString()
71		case SPDX_LICENSE_INFO_IN_FILE: // 4.6
72			// cardinality: min 1
73			lic, err := parser.getAnyLicenseFromNode(subTriple.Object)
74			if err != nil {
75				return nil, fmt.Errorf("error parsing licenseInfoInFile: %v", err)
76			}
77			file.LicenseInfoInFiles = append(file.LicenseInfoInFiles, lic.ToLicenseString())
78		case SPDX_LICENSE_COMMENTS: // 4.7
79			// cardinality: max 1
80			file.LicenseComments = subTriple.Object.ID
81		// TODO: allow copyright text to be of type NOASSERTION
82		case SPDX_COPYRIGHT_TEXT: // 4.8
83			// cardinality: exactly 1
84			file.FileCopyrightText = subTriple.Object.ID
85		case SPDX_LICENSE_INFO_FROM_FILES:
86			// TODO: implement it. It is not defined in the tools-golang model.
87		// deprecated artifactOf (see sections 4.9, 4.10, 4.11)
88		case SPDX_ARTIFACT_OF:
89			// cardinality: min 0
90			var artifactOf *v2_3.ArtifactOfProject
91			artifactOf, err = parser.getArtifactFromNode(subTriple.Object)
92			file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf)
93		case RDFS_COMMENT: // 4.12
94			// cardinality: max 1
95			file.FileComment = subTriple.Object.ID
96		case SPDX_NOTICE_TEXT: // 4.13
97			// cardinality: max 1
98			file.FileNotice = getNoticeTextFromNode(subTriple.Object)
99		case SPDX_FILE_CONTRIBUTOR: // 4.14
100			// cardinality: min 0
101			file.FileContributors = append(file.FileContributors, subTriple.Object.ID)
102		case SPDX_FILE_DEPENDENCY:
103			// cardinality: min 0
104			newFile, err := parser.getFileFromNode(subTriple.Object)
105			if err != nil {
106				return nil, fmt.Errorf("error setting a file dependency in a file: %v", err)
107			}
108			file.FileDependencies = append(file.FileDependencies, string(newFile.FileSPDXIdentifier))
109		case SPDX_ATTRIBUTION_TEXT:
110			// cardinality: min 0
111			file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID)
112		case SPDX_ANNOTATION:
113			// cardinality: min 0
114			err = parser.parseAnnotationFromNode(subTriple.Object)
115		case SPDX_RELATIONSHIP:
116			// cardinality: min 0
117			err = parser.parseRelationship(subTriple)
118		default:
119			return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID)
120		}
121		if err != nil {
122			return nil, err
123		}
124	}
125	parser.files[file.FileSPDXIdentifier] = file
126	return file, nil
127}
128
129func (parser *rdfParser2_3) setFileChecksumFromNode(file *v2_3.File, checksumNode *gordfParser.Node) error {
130	checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode)
131	if err != nil {
132		return fmt.Errorf("error parsing checksumNode of a file: %v", err)
133	}
134	if file.Checksums == nil {
135		file.Checksums = []common.Checksum{}
136	}
137	switch checksumAlgorithm {
138	case common.SHA1,
139		common.SHA224,
140		common.SHA256,
141		common.SHA384,
142		common.SHA512,
143		common.MD2,
144		common.MD4,
145		common.MD5,
146		common.MD6,
147		common.SHA3_256,
148		common.SHA3_384,
149		common.SHA3_512,
150		common.BLAKE2b_256,
151		common.BLAKE2b_384,
152		common.BLAKE2b_512,
153		common.BLAKE3,
154		common.ADLER32:
155		file.Checksums = append(file.Checksums, common.Checksum{Algorithm: checksumAlgorithm, Value: checksumValue})
156	case "":
157		return fmt.Errorf("empty checksum algorithm and value")
158	default:
159		return fmt.Errorf("unknown checksumAlgorithm %s for a file", checksumAlgorithm)
160	}
161	return nil
162}
163
164func (parser *rdfParser2_3) getArtifactFromNode(node *gordfParser.Node) (*v2_3.ArtifactOfProject, error) {
165	artifactOf := &v2_3.ArtifactOfProject{}
166	// setting artifactOfProjectURI attribute (which is optional)
167	if node.NodeType == gordfParser.IRI {
168		artifactOf.URI = node.ID
169	}
170	// parsing rest triples and attributes of the artifact.
171	for _, triple := range parser.nodeToTriples(node) {
172		switch triple.Predicate.ID {
173		case RDF_TYPE:
174		case DOAP_HOMEPAGE:
175			artifactOf.HomePage = triple.Object.ID
176		case DOAP_NAME:
177			artifactOf.Name = triple.Object.ID
178		default:
179			return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID)
180		}
181	}
182	return artifactOf, nil
183}
184
185// TODO: check if the filetype is valid.
186func (parser *rdfParser2_3) getFileTypeFromUri(uri string) (string, error) {
187	// fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text
188	lastPart := getLastPartOfURI(uri)
189	if !strings.HasPrefix(lastPart, "fileType_") {
190		return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart)
191	}
192	return strings.TrimPrefix(lastPart, "fileType_"), nil
193}
194
195// populates parser.doc.Files by a list of files which are not
196// associated with a package by the hasFile attribute
197// assumes: all the packages are already parsed.
198func (parser *rdfParser2_3) setUnpackagedFiles() {
199	for fileID := range parser.files {
200		if !parser.assocWithPackage[fileID] {
201			parser.doc.Files = append(parser.doc.Files, parser.files[fileID])
202		}
203	}
204}
205
206func setFileIdentifier(idURI string, file *v2_3.File) (err error) {
207	idURI = strings.TrimSpace(idURI)
208	uriFragment := getLastPartOfURI(idURI)
209	file.FileSPDXIdentifier, err = ExtractElementID(uriFragment)
210	if err != nil {
211		return fmt.Errorf("error setting file identifier: %s", err)
212	}
213	return nil
214}
215
216func getNoticeTextFromNode(node *gordfParser.Node) string {
217	switch node.ID {
218	case SPDX_NOASSERTION_CAPS, SPDX_NOASSERTION_SMALL:
219		return "NOASSERTION"
220	default:
221		return node.ID
222	}
223}
224