xref: /aosp_15_r20/external/spdx-tools/rdfloader/parser2v2/parse_file.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2
3package parser2v2
4
5import (
6	"fmt"
7	"strings"
8
9	gordfParser "github.com/spdx/gordf/rdfloader/parser"
10	"github.com/spdx/tools-golang/spdx/common"
11	"github.com/spdx/tools-golang/spdx/v2_2"
12)
13
14// returns a file instance and the error if any encountered.
15func (parser *rdfParser2_2) getFileFromNode(fileNode *gordfParser.Node) (file *v2_2.File, err error) {
16	file = &v2_2.File{}
17
18	currState := parser.cache[fileNode.ID]
19	if currState == nil {
20		// this is the first time we are seeing this node.
21		parser.cache[fileNode.ID] = &nodeState{
22			object: file,
23			Color:  WHITE,
24		}
25	} else if currState.Color == GREY {
26		// we have already started parsing this file node and we needn't parse it again.
27		return currState.object.(*v2_2.File), nil
28	}
29
30	// setting color to grey to indicate that we've started parsing this node.
31	parser.cache[fileNode.ID].Color = GREY
32
33	// setting color to black just before function returns to the caller to
34	// indicate that parsing current node is complete.
35	defer func() { parser.cache[fileNode.ID].Color = BLACK }()
36
37	err = setFileIdentifier(fileNode.ID, file) // 4.2
38	if err != nil {
39		return nil, err
40	}
41
42	if existingFile := parser.files[file.FileSPDXIdentifier]; existingFile != nil {
43		file = existingFile
44	}
45
46	for _, subTriple := range parser.nodeToTriples(fileNode) {
47		switch subTriple.Predicate.ID {
48		case SPDX_FILE_NAME: // 4.1
49			// cardinality: exactly 1
50			file.FileName = subTriple.Object.ID
51		case SPDX_NAME:
52			// cardinality: exactly 1
53			// TODO: check where it will be set in the golang-tools spdx-data-model
54		case RDF_TYPE:
55			// cardinality: exactly 1
56		case SPDX_FILE_TYPE: // 4.3
57			// cardinality: min 0
58			fileType := ""
59			fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID)
60			file.FileTypes = append(file.FileTypes, fileType)
61		case SPDX_CHECKSUM: // 4.4
62			// cardinality: min 1
63			err = parser.setFileChecksumFromNode(file, subTriple.Object)
64		case SPDX_LICENSE_CONCLUDED: // 4.5
65			// cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion)
66			anyLicense, err := parser.getAnyLicenseFromNode(subTriple.Object)
67			if err != nil {
68				return nil, fmt.Errorf("error parsing licenseConcluded: %v", err)
69			}
70			file.LicenseConcluded = anyLicense.ToLicenseString()
71		case SPDX_LICENSE_INFO_IN_FILE: // 4.6
72			// cardinality: min 1
73			lic, err := parser.getAnyLicenseFromNode(subTriple.Object)
74			if err != nil {
75				return nil, fmt.Errorf("error parsing licenseInfoInFile: %v", err)
76			}
77			file.LicenseInfoInFiles = append(file.LicenseInfoInFiles, lic.ToLicenseString())
78		case SPDX_LICENSE_COMMENTS: // 4.7
79			// cardinality: max 1
80			file.LicenseComments = subTriple.Object.ID
81		// TODO: allow copyright text to be of type NOASSERTION
82		case SPDX_COPYRIGHT_TEXT: // 4.8
83			// cardinality: exactly 1
84			file.FileCopyrightText = subTriple.Object.ID
85		case SPDX_LICENSE_INFO_FROM_FILES:
86			// TODO: implement it. It is not defined in the tools-golang model.
87		// deprecated artifactOf (see sections 4.9, 4.10, 4.11)
88		case SPDX_ARTIFACT_OF:
89			// cardinality: min 0
90			var artifactOf *v2_2.ArtifactOfProject
91			artifactOf, err = parser.getArtifactFromNode(subTriple.Object)
92			file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf)
93		case RDFS_COMMENT: // 4.12
94			// cardinality: max 1
95			file.FileComment = subTriple.Object.ID
96		case SPDX_NOTICE_TEXT: // 4.13
97			// cardinality: max 1
98			file.FileNotice = getNoticeTextFromNode(subTriple.Object)
99		case SPDX_FILE_CONTRIBUTOR: // 4.14
100			// cardinality: min 0
101			file.FileContributors = append(file.FileContributors, subTriple.Object.ID)
102		case SPDX_FILE_DEPENDENCY:
103			// cardinality: min 0
104			newFile, err := parser.getFileFromNode(subTriple.Object)
105			if err != nil {
106				return nil, fmt.Errorf("error setting a file dependency in a file: %v", err)
107			}
108			file.FileDependencies = append(file.FileDependencies, string(newFile.FileSPDXIdentifier))
109		case SPDX_ATTRIBUTION_TEXT:
110			// cardinality: min 0
111			file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID)
112		case SPDX_ANNOTATION:
113			// cardinality: min 0
114			err = parser.parseAnnotationFromNode(subTriple.Object)
115		case SPDX_RELATIONSHIP:
116			// cardinality: min 0
117			err = parser.parseRelationship(subTriple)
118		default:
119			return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID)
120		}
121		if err != nil {
122			return nil, err
123		}
124	}
125	parser.files[file.FileSPDXIdentifier] = file
126	return file, nil
127}
128
129func (parser *rdfParser2_2) setFileChecksumFromNode(file *v2_2.File, checksumNode *gordfParser.Node) error {
130	checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode)
131	if err != nil {
132		return fmt.Errorf("error parsing checksumNode of a file: %v", err)
133	}
134	if file.Checksums == nil {
135		file.Checksums = []common.Checksum{}
136	}
137	switch checksumAlgorithm {
138	case common.SHA1,
139		common.SHA224,
140		common.SHA256,
141		common.SHA384,
142		common.SHA512,
143		common.MD2,
144		common.MD4,
145		common.MD5,
146		common.MD6:
147		file.Checksums = append(file.Checksums, common.Checksum{Algorithm: checksumAlgorithm, Value: checksumValue})
148	case "":
149		return fmt.Errorf("empty checksum algorithm and value")
150	default:
151		return fmt.Errorf("unknown checksumAlgorithm %s for a file", checksumAlgorithm)
152	}
153	return nil
154}
155
156func (parser *rdfParser2_2) getArtifactFromNode(node *gordfParser.Node) (*v2_2.ArtifactOfProject, error) {
157	artifactOf := &v2_2.ArtifactOfProject{}
158	// setting artifactOfProjectURI attribute (which is optional)
159	if node.NodeType == gordfParser.IRI {
160		artifactOf.URI = node.ID
161	}
162	// parsing rest triples and attributes of the artifact.
163	for _, triple := range parser.nodeToTriples(node) {
164		switch triple.Predicate.ID {
165		case RDF_TYPE:
166		case DOAP_HOMEPAGE:
167			artifactOf.HomePage = triple.Object.ID
168		case DOAP_NAME:
169			artifactOf.Name = triple.Object.ID
170		default:
171			return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID)
172		}
173	}
174	return artifactOf, nil
175}
176
177// TODO: check if the filetype is valid.
178func (parser *rdfParser2_2) getFileTypeFromUri(uri string) (string, error) {
179	// fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text
180	lastPart := getLastPartOfURI(uri)
181	if !strings.HasPrefix(lastPart, "fileType_") {
182		return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart)
183	}
184	return strings.TrimPrefix(lastPart, "fileType_"), nil
185}
186
187// populates parser.doc.Files by a list of files which are not
188// associated with a package by the hasFile attribute
189// assumes: all the packages are already parsed.
190func (parser *rdfParser2_2) setUnpackagedFiles() {
191	for fileID := range parser.files {
192		if !parser.assocWithPackage[fileID] {
193			parser.doc.Files = append(parser.doc.Files, parser.files[fileID])
194		}
195	}
196}
197
198func setFileIdentifier(idURI string, file *v2_2.File) (err error) {
199	idURI = strings.TrimSpace(idURI)
200	uriFragment := getLastPartOfURI(idURI)
201	file.FileSPDXIdentifier, err = ExtractElementID(uriFragment)
202	if err != nil {
203		return fmt.Errorf("error setting file identifier: %s", err)
204	}
205	return nil
206}
207
208func getNoticeTextFromNode(node *gordfParser.Node) string {
209	switch node.ID {
210	case SPDX_NOASSERTION_CAPS, SPDX_NOASSERTION_SMALL:
211		return "NOASSERTION"
212	default:
213		return node.ID
214	}
215}
216