1*ba677afaSXin Li// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 2*ba677afaSXin Li 3*ba677afaSXin Lipackage parser2v3 4*ba677afaSXin Li 5*ba677afaSXin Liimport ( 6*ba677afaSXin Li "fmt" 7*ba677afaSXin Li "strings" 8*ba677afaSXin Li 9*ba677afaSXin Li gordfParser "github.com/spdx/gordf/rdfloader/parser" 10*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/common" 11*ba677afaSXin Li "github.com/spdx/tools-golang/spdx/v2_3" 12*ba677afaSXin Li) 13*ba677afaSXin Li 14*ba677afaSXin Li// returns a file instance and the error if any encountered. 15*ba677afaSXin Lifunc (parser *rdfParser2_3) getFileFromNode(fileNode *gordfParser.Node) (file *v2_3.File, err error) { 16*ba677afaSXin Li file = &v2_3.File{} 17*ba677afaSXin Li 18*ba677afaSXin Li currState := parser.cache[fileNode.ID] 19*ba677afaSXin Li if currState == nil { 20*ba677afaSXin Li // this is the first time we are seeing this node. 21*ba677afaSXin Li parser.cache[fileNode.ID] = &nodeState{ 22*ba677afaSXin Li object: file, 23*ba677afaSXin Li Color: WHITE, 24*ba677afaSXin Li } 25*ba677afaSXin Li } else if currState.Color == GREY { 26*ba677afaSXin Li // we have already started parsing this file node and we needn't parse it again. 27*ba677afaSXin Li return currState.object.(*v2_3.File), nil 28*ba677afaSXin Li } 29*ba677afaSXin Li 30*ba677afaSXin Li // setting color to grey to indicate that we've started parsing this node. 31*ba677afaSXin Li parser.cache[fileNode.ID].Color = GREY 32*ba677afaSXin Li 33*ba677afaSXin Li // setting color to black just before function returns to the caller to 34*ba677afaSXin Li // indicate that parsing current node is complete. 35*ba677afaSXin Li defer func() { parser.cache[fileNode.ID].Color = BLACK }() 36*ba677afaSXin Li 37*ba677afaSXin Li err = setFileIdentifier(fileNode.ID, file) // 4.2 38*ba677afaSXin Li if err != nil { 39*ba677afaSXin Li return nil, err 40*ba677afaSXin Li } 41*ba677afaSXin Li 42*ba677afaSXin Li if existingFile := parser.files[file.FileSPDXIdentifier]; existingFile != nil { 43*ba677afaSXin Li file = existingFile 44*ba677afaSXin Li } 45*ba677afaSXin Li 46*ba677afaSXin Li for _, subTriple := range parser.nodeToTriples(fileNode) { 47*ba677afaSXin Li switch subTriple.Predicate.ID { 48*ba677afaSXin Li case SPDX_FILE_NAME: // 4.1 49*ba677afaSXin Li // cardinality: exactly 1 50*ba677afaSXin Li file.FileName = subTriple.Object.ID 51*ba677afaSXin Li case SPDX_NAME: 52*ba677afaSXin Li // cardinality: exactly 1 53*ba677afaSXin Li // TODO: check where it will be set in the golang-tools spdx-data-model 54*ba677afaSXin Li case RDF_TYPE: 55*ba677afaSXin Li // cardinality: exactly 1 56*ba677afaSXin Li case SPDX_FILE_TYPE: // 4.3 57*ba677afaSXin Li // cardinality: min 0 58*ba677afaSXin Li fileType := "" 59*ba677afaSXin Li fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID) 60*ba677afaSXin Li file.FileTypes = append(file.FileTypes, fileType) 61*ba677afaSXin Li case SPDX_CHECKSUM: // 4.4 62*ba677afaSXin Li // cardinality: min 1 63*ba677afaSXin Li err = parser.setFileChecksumFromNode(file, subTriple.Object) 64*ba677afaSXin Li case SPDX_LICENSE_CONCLUDED: // 4.5 65*ba677afaSXin Li // cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion) 66*ba677afaSXin Li anyLicense, err := parser.getAnyLicenseFromNode(subTriple.Object) 67*ba677afaSXin Li if err != nil { 68*ba677afaSXin Li return nil, fmt.Errorf("error parsing licenseConcluded: %v", err) 69*ba677afaSXin Li } 70*ba677afaSXin Li file.LicenseConcluded = anyLicense.ToLicenseString() 71*ba677afaSXin Li case SPDX_LICENSE_INFO_IN_FILE: // 4.6 72*ba677afaSXin Li // cardinality: min 1 73*ba677afaSXin Li lic, err := parser.getAnyLicenseFromNode(subTriple.Object) 74*ba677afaSXin Li if err != nil { 75*ba677afaSXin Li return nil, fmt.Errorf("error parsing licenseInfoInFile: %v", err) 76*ba677afaSXin Li } 77*ba677afaSXin Li file.LicenseInfoInFiles = append(file.LicenseInfoInFiles, lic.ToLicenseString()) 78*ba677afaSXin Li case SPDX_LICENSE_COMMENTS: // 4.7 79*ba677afaSXin Li // cardinality: max 1 80*ba677afaSXin Li file.LicenseComments = subTriple.Object.ID 81*ba677afaSXin Li // TODO: allow copyright text to be of type NOASSERTION 82*ba677afaSXin Li case SPDX_COPYRIGHT_TEXT: // 4.8 83*ba677afaSXin Li // cardinality: exactly 1 84*ba677afaSXin Li file.FileCopyrightText = subTriple.Object.ID 85*ba677afaSXin Li case SPDX_LICENSE_INFO_FROM_FILES: 86*ba677afaSXin Li // TODO: implement it. It is not defined in the tools-golang model. 87*ba677afaSXin Li // deprecated artifactOf (see sections 4.9, 4.10, 4.11) 88*ba677afaSXin Li case SPDX_ARTIFACT_OF: 89*ba677afaSXin Li // cardinality: min 0 90*ba677afaSXin Li var artifactOf *v2_3.ArtifactOfProject 91*ba677afaSXin Li artifactOf, err = parser.getArtifactFromNode(subTriple.Object) 92*ba677afaSXin Li file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf) 93*ba677afaSXin Li case RDFS_COMMENT: // 4.12 94*ba677afaSXin Li // cardinality: max 1 95*ba677afaSXin Li file.FileComment = subTriple.Object.ID 96*ba677afaSXin Li case SPDX_NOTICE_TEXT: // 4.13 97*ba677afaSXin Li // cardinality: max 1 98*ba677afaSXin Li file.FileNotice = getNoticeTextFromNode(subTriple.Object) 99*ba677afaSXin Li case SPDX_FILE_CONTRIBUTOR: // 4.14 100*ba677afaSXin Li // cardinality: min 0 101*ba677afaSXin Li file.FileContributors = append(file.FileContributors, subTriple.Object.ID) 102*ba677afaSXin Li case SPDX_FILE_DEPENDENCY: 103*ba677afaSXin Li // cardinality: min 0 104*ba677afaSXin Li newFile, err := parser.getFileFromNode(subTriple.Object) 105*ba677afaSXin Li if err != nil { 106*ba677afaSXin Li return nil, fmt.Errorf("error setting a file dependency in a file: %v", err) 107*ba677afaSXin Li } 108*ba677afaSXin Li file.FileDependencies = append(file.FileDependencies, string(newFile.FileSPDXIdentifier)) 109*ba677afaSXin Li case SPDX_ATTRIBUTION_TEXT: 110*ba677afaSXin Li // cardinality: min 0 111*ba677afaSXin Li file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID) 112*ba677afaSXin Li case SPDX_ANNOTATION: 113*ba677afaSXin Li // cardinality: min 0 114*ba677afaSXin Li err = parser.parseAnnotationFromNode(subTriple.Object) 115*ba677afaSXin Li case SPDX_RELATIONSHIP: 116*ba677afaSXin Li // cardinality: min 0 117*ba677afaSXin Li err = parser.parseRelationship(subTriple) 118*ba677afaSXin Li default: 119*ba677afaSXin Li return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID) 120*ba677afaSXin Li } 121*ba677afaSXin Li if err != nil { 122*ba677afaSXin Li return nil, err 123*ba677afaSXin Li } 124*ba677afaSXin Li } 125*ba677afaSXin Li parser.files[file.FileSPDXIdentifier] = file 126*ba677afaSXin Li return file, nil 127*ba677afaSXin Li} 128*ba677afaSXin Li 129*ba677afaSXin Lifunc (parser *rdfParser2_3) setFileChecksumFromNode(file *v2_3.File, checksumNode *gordfParser.Node) error { 130*ba677afaSXin Li checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode) 131*ba677afaSXin Li if err != nil { 132*ba677afaSXin Li return fmt.Errorf("error parsing checksumNode of a file: %v", err) 133*ba677afaSXin Li } 134*ba677afaSXin Li if file.Checksums == nil { 135*ba677afaSXin Li file.Checksums = []common.Checksum{} 136*ba677afaSXin Li } 137*ba677afaSXin Li switch checksumAlgorithm { 138*ba677afaSXin Li case common.SHA1, 139*ba677afaSXin Li common.SHA224, 140*ba677afaSXin Li common.SHA256, 141*ba677afaSXin Li common.SHA384, 142*ba677afaSXin Li common.SHA512, 143*ba677afaSXin Li common.MD2, 144*ba677afaSXin Li common.MD4, 145*ba677afaSXin Li common.MD5, 146*ba677afaSXin Li common.MD6, 147*ba677afaSXin Li common.SHA3_256, 148*ba677afaSXin Li common.SHA3_384, 149*ba677afaSXin Li common.SHA3_512, 150*ba677afaSXin Li common.BLAKE2b_256, 151*ba677afaSXin Li common.BLAKE2b_384, 152*ba677afaSXin Li common.BLAKE2b_512, 153*ba677afaSXin Li common.BLAKE3, 154*ba677afaSXin Li common.ADLER32: 155*ba677afaSXin Li file.Checksums = append(file.Checksums, common.Checksum{Algorithm: checksumAlgorithm, Value: checksumValue}) 156*ba677afaSXin Li case "": 157*ba677afaSXin Li return fmt.Errorf("empty checksum algorithm and value") 158*ba677afaSXin Li default: 159*ba677afaSXin Li return fmt.Errorf("unknown checksumAlgorithm %s for a file", checksumAlgorithm) 160*ba677afaSXin Li } 161*ba677afaSXin Li return nil 162*ba677afaSXin Li} 163*ba677afaSXin Li 164*ba677afaSXin Lifunc (parser *rdfParser2_3) getArtifactFromNode(node *gordfParser.Node) (*v2_3.ArtifactOfProject, error) { 165*ba677afaSXin Li artifactOf := &v2_3.ArtifactOfProject{} 166*ba677afaSXin Li // setting artifactOfProjectURI attribute (which is optional) 167*ba677afaSXin Li if node.NodeType == gordfParser.IRI { 168*ba677afaSXin Li artifactOf.URI = node.ID 169*ba677afaSXin Li } 170*ba677afaSXin Li // parsing rest triples and attributes of the artifact. 171*ba677afaSXin Li for _, triple := range parser.nodeToTriples(node) { 172*ba677afaSXin Li switch triple.Predicate.ID { 173*ba677afaSXin Li case RDF_TYPE: 174*ba677afaSXin Li case DOAP_HOMEPAGE: 175*ba677afaSXin Li artifactOf.HomePage = triple.Object.ID 176*ba677afaSXin Li case DOAP_NAME: 177*ba677afaSXin Li artifactOf.Name = triple.Object.ID 178*ba677afaSXin Li default: 179*ba677afaSXin Li return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID) 180*ba677afaSXin Li } 181*ba677afaSXin Li } 182*ba677afaSXin Li return artifactOf, nil 183*ba677afaSXin Li} 184*ba677afaSXin Li 185*ba677afaSXin Li// TODO: check if the filetype is valid. 186*ba677afaSXin Lifunc (parser *rdfParser2_3) getFileTypeFromUri(uri string) (string, error) { 187*ba677afaSXin Li // fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text 188*ba677afaSXin Li lastPart := getLastPartOfURI(uri) 189*ba677afaSXin Li if !strings.HasPrefix(lastPart, "fileType_") { 190*ba677afaSXin Li return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart) 191*ba677afaSXin Li } 192*ba677afaSXin Li return strings.TrimPrefix(lastPart, "fileType_"), nil 193*ba677afaSXin Li} 194*ba677afaSXin Li 195*ba677afaSXin Li// populates parser.doc.Files by a list of files which are not 196*ba677afaSXin Li// associated with a package by the hasFile attribute 197*ba677afaSXin Li// assumes: all the packages are already parsed. 198*ba677afaSXin Lifunc (parser *rdfParser2_3) setUnpackagedFiles() { 199*ba677afaSXin Li for fileID := range parser.files { 200*ba677afaSXin Li if !parser.assocWithPackage[fileID] { 201*ba677afaSXin Li parser.doc.Files = append(parser.doc.Files, parser.files[fileID]) 202*ba677afaSXin Li } 203*ba677afaSXin Li } 204*ba677afaSXin Li} 205*ba677afaSXin Li 206*ba677afaSXin Lifunc setFileIdentifier(idURI string, file *v2_3.File) (err error) { 207*ba677afaSXin Li idURI = strings.TrimSpace(idURI) 208*ba677afaSXin Li uriFragment := getLastPartOfURI(idURI) 209*ba677afaSXin Li file.FileSPDXIdentifier, err = ExtractElementID(uriFragment) 210*ba677afaSXin Li if err != nil { 211*ba677afaSXin Li return fmt.Errorf("error setting file identifier: %s", err) 212*ba677afaSXin Li } 213*ba677afaSXin Li return nil 214*ba677afaSXin Li} 215*ba677afaSXin Li 216*ba677afaSXin Lifunc getNoticeTextFromNode(node *gordfParser.Node) string { 217*ba677afaSXin Li switch node.ID { 218*ba677afaSXin Li case SPDX_NOASSERTION_CAPS, SPDX_NOASSERTION_SMALL: 219*ba677afaSXin Li return "NOASSERTION" 220*ba677afaSXin Li default: 221*ba677afaSXin Li return node.ID 222*ba677afaSXin Li } 223*ba677afaSXin Li} 224