xref: /aosp_15_r20/external/spdx-tools/rdfloader/parser2v3/parse_file.go (revision ba677afa8f67bb56cbc794f4d0e378e0da058e16)
1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2*ba677afaSXin Li
3*ba677afaSXin Lipackage parser2v3
4*ba677afaSXin Li
5*ba677afaSXin Liimport (
6*ba677afaSXin Li	"fmt"
7*ba677afaSXin Li	"strings"
8*ba677afaSXin Li
9*ba677afaSXin Li	gordfParser "github.com/spdx/gordf/rdfloader/parser"
10*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/common"
11*ba677afaSXin Li	"github.com/spdx/tools-golang/spdx/v2_3"
12*ba677afaSXin Li)
13*ba677afaSXin Li
14*ba677afaSXin Li// returns a file instance and the error if any encountered.
15*ba677afaSXin Lifunc (parser *rdfParser2_3) getFileFromNode(fileNode *gordfParser.Node) (file *v2_3.File, err error) {
16*ba677afaSXin Li	file = &v2_3.File{}
17*ba677afaSXin Li
18*ba677afaSXin Li	currState := parser.cache[fileNode.ID]
19*ba677afaSXin Li	if currState == nil {
20*ba677afaSXin Li		// this is the first time we are seeing this node.
21*ba677afaSXin Li		parser.cache[fileNode.ID] = &nodeState{
22*ba677afaSXin Li			object: file,
23*ba677afaSXin Li			Color:  WHITE,
24*ba677afaSXin Li		}
25*ba677afaSXin Li	} else if currState.Color == GREY {
26*ba677afaSXin Li		// we have already started parsing this file node and we needn't parse it again.
27*ba677afaSXin Li		return currState.object.(*v2_3.File), nil
28*ba677afaSXin Li	}
29*ba677afaSXin Li
30*ba677afaSXin Li	// setting color to grey to indicate that we've started parsing this node.
31*ba677afaSXin Li	parser.cache[fileNode.ID].Color = GREY
32*ba677afaSXin Li
33*ba677afaSXin Li	// setting color to black just before function returns to the caller to
34*ba677afaSXin Li	// indicate that parsing current node is complete.
35*ba677afaSXin Li	defer func() { parser.cache[fileNode.ID].Color = BLACK }()
36*ba677afaSXin Li
37*ba677afaSXin Li	err = setFileIdentifier(fileNode.ID, file) // 4.2
38*ba677afaSXin Li	if err != nil {
39*ba677afaSXin Li		return nil, err
40*ba677afaSXin Li	}
41*ba677afaSXin Li
42*ba677afaSXin Li	if existingFile := parser.files[file.FileSPDXIdentifier]; existingFile != nil {
43*ba677afaSXin Li		file = existingFile
44*ba677afaSXin Li	}
45*ba677afaSXin Li
46*ba677afaSXin Li	for _, subTriple := range parser.nodeToTriples(fileNode) {
47*ba677afaSXin Li		switch subTriple.Predicate.ID {
48*ba677afaSXin Li		case SPDX_FILE_NAME: // 4.1
49*ba677afaSXin Li			// cardinality: exactly 1
50*ba677afaSXin Li			file.FileName = subTriple.Object.ID
51*ba677afaSXin Li		case SPDX_NAME:
52*ba677afaSXin Li			// cardinality: exactly 1
53*ba677afaSXin Li			// TODO: check where it will be set in the golang-tools spdx-data-model
54*ba677afaSXin Li		case RDF_TYPE:
55*ba677afaSXin Li			// cardinality: exactly 1
56*ba677afaSXin Li		case SPDX_FILE_TYPE: // 4.3
57*ba677afaSXin Li			// cardinality: min 0
58*ba677afaSXin Li			fileType := ""
59*ba677afaSXin Li			fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID)
60*ba677afaSXin Li			file.FileTypes = append(file.FileTypes, fileType)
61*ba677afaSXin Li		case SPDX_CHECKSUM: // 4.4
62*ba677afaSXin Li			// cardinality: min 1
63*ba677afaSXin Li			err = parser.setFileChecksumFromNode(file, subTriple.Object)
64*ba677afaSXin Li		case SPDX_LICENSE_CONCLUDED: // 4.5
65*ba677afaSXin Li			// cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion)
66*ba677afaSXin Li			anyLicense, err := parser.getAnyLicenseFromNode(subTriple.Object)
67*ba677afaSXin Li			if err != nil {
68*ba677afaSXin Li				return nil, fmt.Errorf("error parsing licenseConcluded: %v", err)
69*ba677afaSXin Li			}
70*ba677afaSXin Li			file.LicenseConcluded = anyLicense.ToLicenseString()
71*ba677afaSXin Li		case SPDX_LICENSE_INFO_IN_FILE: // 4.6
72*ba677afaSXin Li			// cardinality: min 1
73*ba677afaSXin Li			lic, err := parser.getAnyLicenseFromNode(subTriple.Object)
74*ba677afaSXin Li			if err != nil {
75*ba677afaSXin Li				return nil, fmt.Errorf("error parsing licenseInfoInFile: %v", err)
76*ba677afaSXin Li			}
77*ba677afaSXin Li			file.LicenseInfoInFiles = append(file.LicenseInfoInFiles, lic.ToLicenseString())
78*ba677afaSXin Li		case SPDX_LICENSE_COMMENTS: // 4.7
79*ba677afaSXin Li			// cardinality: max 1
80*ba677afaSXin Li			file.LicenseComments = subTriple.Object.ID
81*ba677afaSXin Li		// TODO: allow copyright text to be of type NOASSERTION
82*ba677afaSXin Li		case SPDX_COPYRIGHT_TEXT: // 4.8
83*ba677afaSXin Li			// cardinality: exactly 1
84*ba677afaSXin Li			file.FileCopyrightText = subTriple.Object.ID
85*ba677afaSXin Li		case SPDX_LICENSE_INFO_FROM_FILES:
86*ba677afaSXin Li			// TODO: implement it. It is not defined in the tools-golang model.
87*ba677afaSXin Li		// deprecated artifactOf (see sections 4.9, 4.10, 4.11)
88*ba677afaSXin Li		case SPDX_ARTIFACT_OF:
89*ba677afaSXin Li			// cardinality: min 0
90*ba677afaSXin Li			var artifactOf *v2_3.ArtifactOfProject
91*ba677afaSXin Li			artifactOf, err = parser.getArtifactFromNode(subTriple.Object)
92*ba677afaSXin Li			file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf)
93*ba677afaSXin Li		case RDFS_COMMENT: // 4.12
94*ba677afaSXin Li			// cardinality: max 1
95*ba677afaSXin Li			file.FileComment = subTriple.Object.ID
96*ba677afaSXin Li		case SPDX_NOTICE_TEXT: // 4.13
97*ba677afaSXin Li			// cardinality: max 1
98*ba677afaSXin Li			file.FileNotice = getNoticeTextFromNode(subTriple.Object)
99*ba677afaSXin Li		case SPDX_FILE_CONTRIBUTOR: // 4.14
100*ba677afaSXin Li			// cardinality: min 0
101*ba677afaSXin Li			file.FileContributors = append(file.FileContributors, subTriple.Object.ID)
102*ba677afaSXin Li		case SPDX_FILE_DEPENDENCY:
103*ba677afaSXin Li			// cardinality: min 0
104*ba677afaSXin Li			newFile, err := parser.getFileFromNode(subTriple.Object)
105*ba677afaSXin Li			if err != nil {
106*ba677afaSXin Li				return nil, fmt.Errorf("error setting a file dependency in a file: %v", err)
107*ba677afaSXin Li			}
108*ba677afaSXin Li			file.FileDependencies = append(file.FileDependencies, string(newFile.FileSPDXIdentifier))
109*ba677afaSXin Li		case SPDX_ATTRIBUTION_TEXT:
110*ba677afaSXin Li			// cardinality: min 0
111*ba677afaSXin Li			file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID)
112*ba677afaSXin Li		case SPDX_ANNOTATION:
113*ba677afaSXin Li			// cardinality: min 0
114*ba677afaSXin Li			err = parser.parseAnnotationFromNode(subTriple.Object)
115*ba677afaSXin Li		case SPDX_RELATIONSHIP:
116*ba677afaSXin Li			// cardinality: min 0
117*ba677afaSXin Li			err = parser.parseRelationship(subTriple)
118*ba677afaSXin Li		default:
119*ba677afaSXin Li			return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID)
120*ba677afaSXin Li		}
121*ba677afaSXin Li		if err != nil {
122*ba677afaSXin Li			return nil, err
123*ba677afaSXin Li		}
124*ba677afaSXin Li	}
125*ba677afaSXin Li	parser.files[file.FileSPDXIdentifier] = file
126*ba677afaSXin Li	return file, nil
127*ba677afaSXin Li}
128*ba677afaSXin Li
129*ba677afaSXin Lifunc (parser *rdfParser2_3) setFileChecksumFromNode(file *v2_3.File, checksumNode *gordfParser.Node) error {
130*ba677afaSXin Li	checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode)
131*ba677afaSXin Li	if err != nil {
132*ba677afaSXin Li		return fmt.Errorf("error parsing checksumNode of a file: %v", err)
133*ba677afaSXin Li	}
134*ba677afaSXin Li	if file.Checksums == nil {
135*ba677afaSXin Li		file.Checksums = []common.Checksum{}
136*ba677afaSXin Li	}
137*ba677afaSXin Li	switch checksumAlgorithm {
138*ba677afaSXin Li	case common.SHA1,
139*ba677afaSXin Li		common.SHA224,
140*ba677afaSXin Li		common.SHA256,
141*ba677afaSXin Li		common.SHA384,
142*ba677afaSXin Li		common.SHA512,
143*ba677afaSXin Li		common.MD2,
144*ba677afaSXin Li		common.MD4,
145*ba677afaSXin Li		common.MD5,
146*ba677afaSXin Li		common.MD6,
147*ba677afaSXin Li		common.SHA3_256,
148*ba677afaSXin Li		common.SHA3_384,
149*ba677afaSXin Li		common.SHA3_512,
150*ba677afaSXin Li		common.BLAKE2b_256,
151*ba677afaSXin Li		common.BLAKE2b_384,
152*ba677afaSXin Li		common.BLAKE2b_512,
153*ba677afaSXin Li		common.BLAKE3,
154*ba677afaSXin Li		common.ADLER32:
155*ba677afaSXin Li		file.Checksums = append(file.Checksums, common.Checksum{Algorithm: checksumAlgorithm, Value: checksumValue})
156*ba677afaSXin Li	case "":
157*ba677afaSXin Li		return fmt.Errorf("empty checksum algorithm and value")
158*ba677afaSXin Li	default:
159*ba677afaSXin Li		return fmt.Errorf("unknown checksumAlgorithm %s for a file", checksumAlgorithm)
160*ba677afaSXin Li	}
161*ba677afaSXin Li	return nil
162*ba677afaSXin Li}
163*ba677afaSXin Li
164*ba677afaSXin Lifunc (parser *rdfParser2_3) getArtifactFromNode(node *gordfParser.Node) (*v2_3.ArtifactOfProject, error) {
165*ba677afaSXin Li	artifactOf := &v2_3.ArtifactOfProject{}
166*ba677afaSXin Li	// setting artifactOfProjectURI attribute (which is optional)
167*ba677afaSXin Li	if node.NodeType == gordfParser.IRI {
168*ba677afaSXin Li		artifactOf.URI = node.ID
169*ba677afaSXin Li	}
170*ba677afaSXin Li	// parsing rest triples and attributes of the artifact.
171*ba677afaSXin Li	for _, triple := range parser.nodeToTriples(node) {
172*ba677afaSXin Li		switch triple.Predicate.ID {
173*ba677afaSXin Li		case RDF_TYPE:
174*ba677afaSXin Li		case DOAP_HOMEPAGE:
175*ba677afaSXin Li			artifactOf.HomePage = triple.Object.ID
176*ba677afaSXin Li		case DOAP_NAME:
177*ba677afaSXin Li			artifactOf.Name = triple.Object.ID
178*ba677afaSXin Li		default:
179*ba677afaSXin Li			return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID)
180*ba677afaSXin Li		}
181*ba677afaSXin Li	}
182*ba677afaSXin Li	return artifactOf, nil
183*ba677afaSXin Li}
184*ba677afaSXin Li
185*ba677afaSXin Li// TODO: check if the filetype is valid.
186*ba677afaSXin Lifunc (parser *rdfParser2_3) getFileTypeFromUri(uri string) (string, error) {
187*ba677afaSXin Li	// fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text
188*ba677afaSXin Li	lastPart := getLastPartOfURI(uri)
189*ba677afaSXin Li	if !strings.HasPrefix(lastPart, "fileType_") {
190*ba677afaSXin Li		return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart)
191*ba677afaSXin Li	}
192*ba677afaSXin Li	return strings.TrimPrefix(lastPart, "fileType_"), nil
193*ba677afaSXin Li}
194*ba677afaSXin Li
195*ba677afaSXin Li// populates parser.doc.Files by a list of files which are not
196*ba677afaSXin Li// associated with a package by the hasFile attribute
197*ba677afaSXin Li// assumes: all the packages are already parsed.
198*ba677afaSXin Lifunc (parser *rdfParser2_3) setUnpackagedFiles() {
199*ba677afaSXin Li	for fileID := range parser.files {
200*ba677afaSXin Li		if !parser.assocWithPackage[fileID] {
201*ba677afaSXin Li			parser.doc.Files = append(parser.doc.Files, parser.files[fileID])
202*ba677afaSXin Li		}
203*ba677afaSXin Li	}
204*ba677afaSXin Li}
205*ba677afaSXin Li
206*ba677afaSXin Lifunc setFileIdentifier(idURI string, file *v2_3.File) (err error) {
207*ba677afaSXin Li	idURI = strings.TrimSpace(idURI)
208*ba677afaSXin Li	uriFragment := getLastPartOfURI(idURI)
209*ba677afaSXin Li	file.FileSPDXIdentifier, err = ExtractElementID(uriFragment)
210*ba677afaSXin Li	if err != nil {
211*ba677afaSXin Li		return fmt.Errorf("error setting file identifier: %s", err)
212*ba677afaSXin Li	}
213*ba677afaSXin Li	return nil
214*ba677afaSXin Li}
215*ba677afaSXin Li
216*ba677afaSXin Lifunc getNoticeTextFromNode(node *gordfParser.Node) string {
217*ba677afaSXin Li	switch node.ID {
218*ba677afaSXin Li	case SPDX_NOASSERTION_CAPS, SPDX_NOASSERTION_SMALL:
219*ba677afaSXin Li		return "NOASSERTION"
220*ba677afaSXin Li	default:
221*ba677afaSXin Li		return node.ID
222*ba677afaSXin Li	}
223*ba677afaSXin Li}
224