1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 2 3package parser2v2 4 5import ( 6 "errors" 7 "fmt" 8 9 gordfParser "github.com/spdx/gordf/rdfloader/parser" 10 gordfWriter "github.com/spdx/gordf/rdfwriter" 11 "github.com/spdx/tools-golang/spdx/common" 12 "github.com/spdx/tools-golang/spdx/v2_2" 13) 14 15// returns a new instance of rdfParser2_2 given the gordf object and nodeToTriples mapping 16func NewParser2_2(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_2 { 17 parser := rdfParser2_2{ 18 gordfParserObj: gordfParserObj, 19 nodeStringToTriples: nodeToTriples, 20 doc: &v2_2.Document{ 21 ExternalDocumentReferences: []v2_2.ExternalDocumentRef{}, 22 CreationInfo: &v2_2.CreationInfo{}, 23 Packages: []*v2_2.Package{}, 24 Files: []*v2_2.File{}, 25 OtherLicenses: []*v2_2.OtherLicense{}, 26 Relationships: []*v2_2.Relationship{}, 27 Annotations: []*v2_2.Annotation{}, 28 Reviews: []*v2_2.Review{}, 29 }, 30 files: map[common.ElementID]*v2_2.File{}, 31 assocWithPackage: map[common.ElementID]bool{}, 32 cache: map[string]*nodeState{}, 33 } 34 return &parser 35} 36 37// main function which takes in a gordfParser and returns 38// a spdxDocument model or the error encountered while parsing it 39func LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*v2_2.Document, error) { 40 // nodeToTriples is a mapping from a node to list of triples. 41 // for every node in the set of subjects of all the triples, 42 // it provides a list of triples that are associated with that subject node. 43 nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples) 44 parser := NewParser2_2(gordfParserObj, nodeToTriples) 45 46 spdxDocumentNode, err := parser.getSpdxDocNode() 47 if err != nil { 48 return nil, err 49 } 50 51 err = parser.parseSpdxDocumentNode(spdxDocumentNode) 52 if err != nil { 53 return nil, err 54 } 55 56 // parsing other root elements 57 for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { 58 typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil) 59 if len(typeTriples) != 1 { 60 return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples)) 61 } 62 switch typeTriples[0].Object.ID { 63 case SPDX_SPDX_DOCUMENT_CAPITALIZED: 64 continue // it is already parsed. 65 case SPDX_SNIPPET: 66 snippet, err := parser.getSnippetInformationFromNode2_2(typeTriples[0].Subject) 67 if err != nil { 68 return nil, fmt.Errorf("error parsing a snippet: %v", err) 69 } 70 err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier) 71 if err != nil { 72 return nil, err 73 } 74 // todo: check other root node attributes. 75 default: 76 continue 77 // because in rdf it is quite possible that the root node is an 78 // element that has been used in the some other element as a child 79 } 80 } 81 82 // parsing packages and files sets the files to a files variable which is 83 // associated with the parser and not the document. following method is 84 // necessary to transfer the files which are not set in the packages to the 85 // Files attribute of the document 86 // WARNING: do not relocate following function call. It must be at the end of the function 87 parser.setUnpackagedFiles() 88 return parser.doc, nil 89} 90 91// from the given parser object, returns the SpdxDocument Node defined in the root elements. 92// returns error if the document is associated with no SpdxDocument or 93// associated with more than one SpdxDocument node. 94func (parser *rdfParser2_2) getSpdxDocNode() (node *gordfParser.Node, err error) { 95 /* Possible Questions: 96 1. why are you traversing the root nodes only? why not directly filter out 97 all the triples with rdf:type=spdx:SpdxDocument? 98 Ans: It is quite possible that the relatedElement or any other attribute 99 to have dependency of another SpdxDocument. In that case, that 100 element will reference the dependency using SpdxDocument tag which will 101 cause false positives when direct filtering is done. 102 */ 103 // iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument 104 var spdxDocNode *gordfParser.Node 105 for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { 106 typeTriples := gordfWriter.FilterTriples( 107 parser.nodeToTriples(rootNode), // triples 108 &rootNode.ID, // Subject 109 &RDF_TYPE, // Predicate 110 nil, // Object 111 ) 112 113 if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED { 114 // we found a SpdxDocument Node 115 116 // must be associated with exactly one rdf:type. 117 if len(typeTriples) != 1 { 118 return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+ 119 " triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples)) 120 } 121 122 // checking if we've already found a node and it is not same as the current one. 123 if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID { 124 return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject) 125 } 126 spdxDocNode = typeTriples[0].Subject 127 } 128 } 129 if spdxDocNode == nil { 130 return nil, errors.New("RDF files must be associated with a SpdxDocument tag. No tag found") 131 } 132 return spdxDocNode, nil 133} 134