xref: /aosp_15_r20/external/spdx-tools/rdfloader/parser2v2/parser.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2
3package parser2v2
4
5import (
6	"errors"
7	"fmt"
8
9	gordfParser "github.com/spdx/gordf/rdfloader/parser"
10	gordfWriter "github.com/spdx/gordf/rdfwriter"
11	"github.com/spdx/tools-golang/spdx/common"
12	"github.com/spdx/tools-golang/spdx/v2_2"
13)
14
15// returns a new instance of rdfParser2_2 given the gordf object and nodeToTriples mapping
16func NewParser2_2(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_2 {
17	parser := rdfParser2_2{
18		gordfParserObj:      gordfParserObj,
19		nodeStringToTriples: nodeToTriples,
20		doc: &v2_2.Document{
21			ExternalDocumentReferences: []v2_2.ExternalDocumentRef{},
22			CreationInfo:               &v2_2.CreationInfo{},
23			Packages:                   []*v2_2.Package{},
24			Files:                      []*v2_2.File{},
25			OtherLicenses:              []*v2_2.OtherLicense{},
26			Relationships:              []*v2_2.Relationship{},
27			Annotations:                []*v2_2.Annotation{},
28			Reviews:                    []*v2_2.Review{},
29		},
30		files:            map[common.ElementID]*v2_2.File{},
31		assocWithPackage: map[common.ElementID]bool{},
32		cache:            map[string]*nodeState{},
33	}
34	return &parser
35}
36
37// main function which takes in a gordfParser and returns
38// a spdxDocument model or the error encountered while parsing it
39func LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*v2_2.Document, error) {
40	// nodeToTriples is a mapping from a node to list of triples.
41	// for every node in the set of subjects of all the triples,
42	// it provides a list of triples that are associated with that subject node.
43	nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples)
44	parser := NewParser2_2(gordfParserObj, nodeToTriples)
45
46	spdxDocumentNode, err := parser.getSpdxDocNode()
47	if err != nil {
48		return nil, err
49	}
50
51	err = parser.parseSpdxDocumentNode(spdxDocumentNode)
52	if err != nil {
53		return nil, err
54	}
55
56	// parsing other root elements
57	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
58		typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil)
59		if len(typeTriples) != 1 {
60			return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples))
61		}
62		switch typeTriples[0].Object.ID {
63		case SPDX_SPDX_DOCUMENT_CAPITALIZED:
64			continue // it is already parsed.
65		case SPDX_SNIPPET:
66			snippet, err := parser.getSnippetInformationFromNode2_2(typeTriples[0].Subject)
67			if err != nil {
68				return nil, fmt.Errorf("error parsing a snippet: %v", err)
69			}
70			err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier)
71			if err != nil {
72				return nil, err
73			}
74		// todo: check other root node attributes.
75		default:
76			continue
77			// because in rdf it is quite possible that the root node is an
78			// element that has been used in the some other element as a child
79		}
80	}
81
82	// parsing packages and files sets the files to a files variable which is
83	// associated with the parser and not the document. following method is
84	// necessary to transfer the files which are not set in the packages to the
85	// Files attribute of the document
86	// WARNING: do not relocate following function call. It must be at the end of the function
87	parser.setUnpackagedFiles()
88	return parser.doc, nil
89}
90
91// from the given parser object, returns the SpdxDocument Node defined in the root elements.
92// returns error if the document is associated with no SpdxDocument or
93// associated with more than one SpdxDocument node.
94func (parser *rdfParser2_2) getSpdxDocNode() (node *gordfParser.Node, err error) {
95	/* Possible Questions:
96	1. why are you traversing the root nodes only? why not directly filter out
97	   all the triples with rdf:type=spdx:SpdxDocument?
98	Ans: It is quite possible that the relatedElement or any other attribute
99		 to have dependency of another SpdxDocument. In that case, that
100		 element will reference the dependency using SpdxDocument tag which will
101		 cause false positives when direct filtering is done.
102	*/
103	// iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument
104	var spdxDocNode *gordfParser.Node
105	for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) {
106		typeTriples := gordfWriter.FilterTriples(
107			parser.nodeToTriples(rootNode), // triples
108			&rootNode.ID,                   // Subject
109			&RDF_TYPE,                      // Predicate
110			nil,                            // Object
111		)
112
113		if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED {
114			// we found a SpdxDocument Node
115
116			// must be associated with exactly one rdf:type.
117			if len(typeTriples) != 1 {
118				return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+
119					" triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples))
120			}
121
122			// checking if we've already found a node and it is not same as the current one.
123			if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID {
124				return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject)
125			}
126			spdxDocNode = typeTriples[0].Subject
127		}
128	}
129	if spdxDocNode == nil {
130		return nil, errors.New("RDF files must be associated with a SpdxDocument tag. No tag found")
131	}
132	return spdxDocNode, nil
133}
134