1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 2*ba677afaSXin Li 3*ba677afaSXin Lipackage parser2v3 4*ba677afaSXin Li 5*ba677afaSXin Liimport ( 6*ba677afaSXin Li "errors" 7*ba677afaSXin Li "fmt" 8*ba677afaSXin Li 9*ba677afaSXin Li gordfParser "github.com/spdx/gordf/rdfloader/parser" 10*ba677afaSXin Li gordfWriter "github.com/spdx/gordf/rdfwriter" 11*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/common" 12*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/v2_3" 13*ba677afaSXin Li) 14*ba677afaSXin Li 15*ba677afaSXin Li// returns a new instance of rdfParser2_3 given the gordf object and nodeToTriples mapping 16*ba677afaSXin Lifunc NewParser2_3(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_3 { 17*ba677afaSXin Li parser := rdfParser2_3{ 18*ba677afaSXin Li gordfParserObj: gordfParserObj, 19*ba677afaSXin Li nodeStringToTriples: nodeToTriples, 20*ba677afaSXin Li doc: &v2_3.Document{ 21*ba677afaSXin Li ExternalDocumentReferences: []v2_3.ExternalDocumentRef{}, 22*ba677afaSXin Li CreationInfo: &v2_3.CreationInfo{}, 23*ba677afaSXin Li Packages: []*v2_3.Package{}, 24*ba677afaSXin Li Files: []*v2_3.File{}, 25*ba677afaSXin Li OtherLicenses: []*v2_3.OtherLicense{}, 26*ba677afaSXin Li Relationships: []*v2_3.Relationship{}, 27*ba677afaSXin Li Annotations: []*v2_3.Annotation{}, 28*ba677afaSXin Li Reviews: []*v2_3.Review{}, 29*ba677afaSXin Li }, 30*ba677afaSXin Li files: map[common.ElementID]*v2_3.File{}, 31*ba677afaSXin Li assocWithPackage: map[common.ElementID]bool{}, 32*ba677afaSXin Li cache: map[string]*nodeState{}, 33*ba677afaSXin Li } 34*ba677afaSXin Li return &parser 35*ba677afaSXin Li} 36*ba677afaSXin Li 37*ba677afaSXin Li// main function which takes in a gordfParser and returns 38*ba677afaSXin Li// a spdxDocument model or the error encountered while parsing it 39*ba677afaSXin Lifunc LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*v2_3.Document, error) { 40*ba677afaSXin Li // nodeToTriples is a mapping from a node to list of triples. 41*ba677afaSXin Li // for every node in the set of subjects of all the triples, 42*ba677afaSXin Li // it provides a list of triples that are associated with that subject node. 43*ba677afaSXin Li nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples) 44*ba677afaSXin Li parser := NewParser2_3(gordfParserObj, nodeToTriples) 45*ba677afaSXin Li 46*ba677afaSXin Li spdxDocumentNode, err := parser.getSpdxDocNode() 47*ba677afaSXin Li if err != nil { 48*ba677afaSXin Li return nil, err 49*ba677afaSXin Li } 50*ba677afaSXin Li 51*ba677afaSXin Li err = parser.parseSpdxDocumentNode(spdxDocumentNode) 52*ba677afaSXin Li if err != nil { 53*ba677afaSXin Li return nil, err 54*ba677afaSXin Li } 55*ba677afaSXin Li 56*ba677afaSXin Li // parsing other root elements 57*ba677afaSXin Li for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { 58*ba677afaSXin Li typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil) 59*ba677afaSXin Li if len(typeTriples) != 1 { 60*ba677afaSXin Li return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples)) 61*ba677afaSXin Li } 62*ba677afaSXin Li switch typeTriples[0].Object.ID { 63*ba677afaSXin Li case SPDX_SPDX_DOCUMENT_CAPITALIZED: 64*ba677afaSXin Li continue // it is already parsed. 65*ba677afaSXin Li case SPDX_SNIPPET: 66*ba677afaSXin Li snippet, err := parser.getSnippetInformationFromNode2_3(typeTriples[0].Subject) 67*ba677afaSXin Li if err != nil { 68*ba677afaSXin Li return nil, fmt.Errorf("error parsing a snippet: %v", err) 69*ba677afaSXin Li } 70*ba677afaSXin Li err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier) 71*ba677afaSXin Li if err != nil { 72*ba677afaSXin Li return nil, err 73*ba677afaSXin Li } 74*ba677afaSXin Li // todo: check other root node attributes. 75*ba677afaSXin Li default: 76*ba677afaSXin Li continue 77*ba677afaSXin Li // because in rdf it is quite possible that the root node is an 78*ba677afaSXin Li // element that has been used in the some other element as a child 79*ba677afaSXin Li } 80*ba677afaSXin Li } 81*ba677afaSXin Li 82*ba677afaSXin Li // parsing packages and files sets the files to a files variable which is 83*ba677afaSXin Li // associated with the parser and not the document. following method is 84*ba677afaSXin Li // necessary to transfer the files which are not set in the packages to the 85*ba677afaSXin Li // Files attribute of the document 86*ba677afaSXin Li // WARNING: do not relocate following function call. It must be at the end of the function 87*ba677afaSXin Li parser.setUnpackagedFiles() 88*ba677afaSXin Li return parser.doc, nil 89*ba677afaSXin Li} 90*ba677afaSXin Li 91*ba677afaSXin Li// from the given parser object, returns the SpdxDocument Node defined in the root elements. 92*ba677afaSXin Li// returns error if the document is associated with no SpdxDocument or 93*ba677afaSXin Li// associated with more than one SpdxDocument node. 94*ba677afaSXin Lifunc (parser *rdfParser2_3) getSpdxDocNode() (node *gordfParser.Node, err error) { 95*ba677afaSXin Li /* Possible Questions: 96*ba677afaSXin Li 1. why are you traversing the root nodes only? why not directly filter out 97*ba677afaSXin Li all the triples with rdf:type=spdx:SpdxDocument? 98*ba677afaSXin Li Ans: It is quite possible that the relatedElement or any other attribute 99*ba677afaSXin Li to have dependency of another SpdxDocument. In that case, that 100*ba677afaSXin Li element will reference the dependency using SpdxDocument tag which will 101*ba677afaSXin Li cause false positives when direct filtering is done. 102*ba677afaSXin Li */ 103*ba677afaSXin Li // iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument 104*ba677afaSXin Li var spdxDocNode *gordfParser.Node 105*ba677afaSXin Li for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { 106*ba677afaSXin Li typeTriples := gordfWriter.FilterTriples( 107*ba677afaSXin Li parser.nodeToTriples(rootNode), // triples 108*ba677afaSXin Li &rootNode.ID, // Subject 109*ba677afaSXin Li &RDF_TYPE, // Predicate 110*ba677afaSXin Li nil, // Object 111*ba677afaSXin Li ) 112*ba677afaSXin Li 113*ba677afaSXin Li if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED { 114*ba677afaSXin Li // we found a SpdxDocument Node 115*ba677afaSXin Li 116*ba677afaSXin Li // must be associated with exactly one rdf:type. 117*ba677afaSXin Li if len(typeTriples) != 1 { 118*ba677afaSXin Li return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+ 119*ba677afaSXin Li " triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples)) 120*ba677afaSXin Li } 121*ba677afaSXin Li 122*ba677afaSXin Li // checking if we've already found a node and it is not same as the current one. 123*ba677afaSXin Li if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID { 124*ba677afaSXin Li return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject) 125*ba677afaSXin Li } 126*ba677afaSXin Li spdxDocNode = typeTriples[0].Subject 127*ba677afaSXin Li } 128*ba677afaSXin Li } 129*ba677afaSXin Li if spdxDocNode == nil { 130*ba677afaSXin Li return nil, errors.New("RDF files must be associated with a SpdxDocument tag. No tag found") 131*ba677afaSXin Li } 132*ba677afaSXin Li return spdxDocNode, nil 133*ba677afaSXin Li} 134