xref: /aosp_15_r20/external/spdx-tools/rdfloader/parser2v3/parser.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2*ba677afaSXin Li
3*ba677afaSXin Lipackage parser2v3
4*ba677afaSXin Li
5*ba677afaSXin Liimport (
6*ba677afaSXin Li	"errors"
7*ba677afaSXin Li	"fmt"
8*ba677afaSXin Li
9*ba677afaSXin Li	gordfParser "github.com/spdx/gordf/rdfloader/parser"
10*ba677afaSXin Li	gordfWriter "github.com/spdx/gordf/rdfwriter"
11*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/common"
12*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/v2_3"
13*ba677afaSXin Li)
14*ba677afaSXin Li
15*ba677afaSXin Li// returns a new instance of rdfParser2_3 given the gordf object and nodeToTriples mapping
16*ba677afaSXin Lifunc NewParser2_3(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_3 {
17*ba677afaSXin Li	parser := rdfParser2_3{
18*ba677afaSXin Li		gordfParserObj:      gordfParserObj,
19*ba677afaSXin Li		nodeStringToTriples: nodeToTriples,
20*ba677afaSXin Li		doc: &v2_3.Document{
21*ba677afaSXin Li			ExternalDocumentReferences: []v2_3.ExternalDocumentRef{},
22*ba677afaSXin Li			CreationInfo:               &v2_3.CreationInfo{},
23*ba677afaSXin Li			Packages:                   []*v2_3.Package{},
24*ba677afaSXin Li			Files:                      []*v2_3.File{},
25*ba677afaSXin Li			OtherLicenses:              []*v2_3.OtherLicense{},
26*ba677afaSXin Li			Relationships:              []*v2_3.Relationship{},
27*ba677afaSXin Li			Annotations:                []*v2_3.Annotation{},
28*ba677afaSXin Li			Reviews:                    []*v2_3.Review{},
29*ba677afaSXin Li		},
30*ba677afaSXin Li		files:            map[common.ElementID]*v2_3.File{},
31*ba677afaSXin Li		assocWithPackage: map[common.ElementID]bool{},
32*ba677afaSXin Li		cache:            map[string]*nodeState{},
33*ba677afaSXin Li	}
34*ba677afaSXin Li	return &parser
35*ba677afaSXin Li}
36*ba677afaSXin Li
37*ba677afaSXin Li// main function which takes in a gordfParser and returns
38*ba677afaSXin Li// a spdxDocument model or the error encountered while parsing it
39*ba677afaSXin Lifunc LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*v2_3.Document, error) {
40*ba677afaSXin Li	// nodeToTriples is a mapping from a node to list of triples.
41*ba677afaSXin Li	// for every node in the set of subjects of all the triples,
42*ba677afaSXin Li	// it provides a list of triples that are associated with that subject node.
43*ba677afaSXin Li	nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples)
44*ba677afaSXin Li	parser := NewParser2_3(gordfParserObj, nodeToTriples)
45*ba677afaSXin Li
46*ba677afaSXin Li	spdxDocumentNode, err := parser.getSpdxDocNode()
47*ba677afaSXin Li	if err != nil {
48*ba677afaSXin Li		return nil, err
49*ba677afaSXin Li	}
50*ba677afaSXin Li
51*ba677afaSXin Li	err = parser.parseSpdxDocumentNode(spdxDocumentNode)
52*ba677afaSXin Li	if err != nil {
53*ba677afaSXin Li		return nil, err
54*ba677afaSXin Li	}
55*ba677afaSXin Li
56*ba677afaSXin Li	// parsing other root elements
57*ba677afaSXin Li	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
58*ba677afaSXin Li		typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil)
59*ba677afaSXin Li		if len(typeTriples) != 1 {
60*ba677afaSXin Li			return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples))
61*ba677afaSXin Li		}
62*ba677afaSXin Li		switch typeTriples[0].Object.ID {
63*ba677afaSXin Li		case SPDX_SPDX_DOCUMENT_CAPITALIZED:
64*ba677afaSXin Li			continue // it is already parsed.
65*ba677afaSXin Li		case SPDX_SNIPPET:
66*ba677afaSXin Li			snippet, err := parser.getSnippetInformationFromNode2_3(typeTriples[0].Subject)
67*ba677afaSXin Li			if err != nil {
68*ba677afaSXin Li				return nil, fmt.Errorf("error parsing a snippet: %v", err)
69*ba677afaSXin Li			}
70*ba677afaSXin Li			err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier)
71*ba677afaSXin Li			if err != nil {
72*ba677afaSXin Li				return nil, err
73*ba677afaSXin Li			}
74*ba677afaSXin Li		// todo: check other root node attributes.
75*ba677afaSXin Li		default:
76*ba677afaSXin Li			continue
77*ba677afaSXin Li			// because in rdf it is quite possible that the root node is an
78*ba677afaSXin Li			// element that has been used in the some other element as a child
79*ba677afaSXin Li		}
80*ba677afaSXin Li	}
81*ba677afaSXin Li
82*ba677afaSXin Li	// parsing packages and files sets the files to a files variable which is
83*ba677afaSXin Li	// associated with the parser and not the document. following method is
84*ba677afaSXin Li	// necessary to transfer the files which are not set in the packages to the
85*ba677afaSXin Li	// Files attribute of the document
86*ba677afaSXin Li	// WARNING: do not relocate following function call. It must be at the end of the function
87*ba677afaSXin Li	parser.setUnpackagedFiles()
88*ba677afaSXin Li	return parser.doc, nil
89*ba677afaSXin Li}
90*ba677afaSXin Li
91*ba677afaSXin Li// from the given parser object, returns the SpdxDocument Node defined in the root elements.
92*ba677afaSXin Li// returns error if the document is associated with no SpdxDocument or
93*ba677afaSXin Li// associated with more than one SpdxDocument node.
94*ba677afaSXin Lifunc (parser *rdfParser2_3) getSpdxDocNode() (node *gordfParser.Node, err error) {
95*ba677afaSXin Li	/* Possible Questions:
96*ba677afaSXin Li	1. why are you traversing the root nodes only? why not directly filter out
97*ba677afaSXin Li	   all the triples with rdf:type=spdx:SpdxDocument?
98*ba677afaSXin Li	Ans: It is quite possible that the relatedElement or any other attribute
99*ba677afaSXin Li		 to have dependency of another SpdxDocument. In that case, that
100*ba677afaSXin Li		 element will reference the dependency using SpdxDocument tag which will
101*ba677afaSXin Li		 cause false positives when direct filtering is done.
102*ba677afaSXin Li	*/
103*ba677afaSXin Li	// iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument
104*ba677afaSXin Li	var spdxDocNode *gordfParser.Node
105*ba677afaSXin Li	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
106*ba677afaSXin Li		typeTriples := gordfWriter.FilterTriples(
107*ba677afaSXin Li			parser.nodeToTriples(rootNode), // triples
108*ba677afaSXin Li			&rootNode.ID,                   // Subject
109*ba677afaSXin Li			&RDF_TYPE,                      // Predicate
110*ba677afaSXin Li			nil,                            // Object
111*ba677afaSXin Li		)
112*ba677afaSXin Li
113*ba677afaSXin Li		if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED {
114*ba677afaSXin Li			// we found a SpdxDocument Node
115*ba677afaSXin Li
116*ba677afaSXin Li			// must be associated with exactly one rdf:type.
117*ba677afaSXin Li			if len(typeTriples) != 1 {
118*ba677afaSXin Li				return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+
119*ba677afaSXin Li					" triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples))
120*ba677afaSXin Li			}
121*ba677afaSXin Li
122*ba677afaSXin Li			// checking if we've already found a node and it is not same as the current one.
123*ba677afaSXin Li			if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID {
124*ba677afaSXin Li				return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject)
125*ba677afaSXin Li			}
126*ba677afaSXin Li			spdxDocNode = typeTriples[0].Subject
127*ba677afaSXin Li		}
128*ba677afaSXin Li	}
129*ba677afaSXin Li	if spdxDocNode == nil {
130*ba677afaSXin Li		return nil, errors.New("RDF files must be associated with a SpdxDocument tag. No tag found")
131*ba677afaSXin Li	}
132*ba677afaSXin Li	return spdxDocNode, nil
133*ba677afaSXin Li}
134