1#!/usr/bin/env python3
2#
3# Copyright (C) 2023 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Serialize objects defined in package sbom_data to SPDX format: tagvalue, JSON.
19"""
20
21import json
22import sbom_data
23
24SPDX_VER = 'SPDX-2.3'
25DATA_LIC = 'CC0-1.0'
26
27
28class Tags:
29  # Common
30  SPDXID = 'SPDXID'
31  SPDX_VERSION = 'SPDXVersion'
32  DATA_LICENSE = 'DataLicense'
33  DOCUMENT_NAME = 'DocumentName'
34  DOCUMENT_NAMESPACE = 'DocumentNamespace'
35  CREATED = 'Created'
36  CREATOR = 'Creator'
37  EXTERNAL_DOCUMENT_REF = 'ExternalDocumentRef'
38
39  # Package
40  PACKAGE_NAME = 'PackageName'
41  PACKAGE_DOWNLOAD_LOCATION = 'PackageDownloadLocation'
42  PACKAGE_VERSION = 'PackageVersion'
43  PACKAGE_SUPPLIER = 'PackageSupplier'
44  FILES_ANALYZED = 'FilesAnalyzed'
45  PACKAGE_VERIFICATION_CODE = 'PackageVerificationCode'
46  PACKAGE_EXTERNAL_REF = 'ExternalRef'
47  # Package license
48  PACKAGE_LICENSE_CONCLUDED = 'PackageLicenseConcluded'
49  PACKAGE_LICENSE_INFO_FROM_FILES = 'PackageLicenseInfoFromFiles'
50  PACKAGE_LICENSE_DECLARED = 'PackageLicenseDeclared'
51  PACKAGE_LICENSE_COMMENTS = 'PackageLicenseComments'
52
53  # File
54  FILE_NAME = 'FileName'
55  FILE_CHECKSUM = 'FileChecksum'
56  # File license
57  FILE_LICENSE_CONCLUDED = 'LicenseConcluded'
58  FILE_LICENSE_INFO_IN_FILE = 'LicenseInfoInFile'
59  FILE_LICENSE_COMMENTS = 'LicenseComments'
60  FILE_COPYRIGHT_TEXT = 'FileCopyrightText'
61  FILE_NOTICE = 'FileNotice'
62  FILE_ATTRIBUTION_TEXT = 'FileAttributionText'
63
64  # Relationship
65  RELATIONSHIP = 'Relationship'
66
67  # License
68  LICENSE_ID = 'LicenseID'
69  LICENSE_NAME = 'LicenseName'
70  LICENSE_EXTRACTED_TEXT = 'ExtractedText'
71
72
73class TagValueWriter:
74  @staticmethod
75  def marshal_doc_headers(sbom_doc):
76    headers = [
77      f'{Tags.SPDX_VERSION}: {SPDX_VER}',
78      f'{Tags.DATA_LICENSE}: {DATA_LIC}',
79      f'{Tags.SPDXID}: {sbom_doc.id}',
80      f'{Tags.DOCUMENT_NAME}: {sbom_doc.name}',
81      f'{Tags.DOCUMENT_NAMESPACE}: {sbom_doc.namespace}',
82    ]
83    for creator in sbom_doc.creators:
84      headers.append(f'{Tags.CREATOR}: {creator}')
85    headers.append(f'{Tags.CREATED}: {sbom_doc.created}')
86    for doc_ref in sbom_doc.external_refs:
87      headers.append(
88        f'{Tags.EXTERNAL_DOCUMENT_REF}: {doc_ref.id} {doc_ref.uri} {doc_ref.checksum}')
89    headers.append('')
90    return headers
91
92  @staticmethod
93  def marshal_package(sbom_doc, package, fragment):
94    download_location = sbom_data.VALUE_NOASSERTION
95    if package.download_location:
96      download_location = package.download_location
97    tagvalues = [
98      f'{Tags.PACKAGE_NAME}: {package.name}',
99      f'{Tags.SPDXID}: {package.id}',
100      f'{Tags.PACKAGE_DOWNLOAD_LOCATION}: {download_location}',
101      f'{Tags.FILES_ANALYZED}: {str(package.files_analyzed).lower()}',
102    ]
103    if package.version:
104      tagvalues.append(f'{Tags.PACKAGE_VERSION}: {package.version}')
105    if package.supplier:
106      tagvalues.append(f'{Tags.PACKAGE_SUPPLIER}: {package.supplier}')
107
108    license = sbom_data.VALUE_NOASSERTION
109    if package.declared_license_ids:
110      license = ' OR '.join(package.declared_license_ids)
111    tagvalues.append(f'{Tags.PACKAGE_LICENSE_DECLARED}: {license}')
112
113    if package.verification_code:
114      tagvalues.append(f'{Tags.PACKAGE_VERIFICATION_CODE}: {package.verification_code}')
115    if package.external_refs:
116      for external_ref in package.external_refs:
117        tagvalues.append(
118          f'{Tags.PACKAGE_EXTERNAL_REF}: {external_ref.category} {external_ref.type} {external_ref.locator}')
119
120    tagvalues.append('')
121
122    if package.id == sbom_doc.describes and not fragment:
123      tagvalues.append(
124          f'{Tags.RELATIONSHIP}: {sbom_doc.id} {sbom_data.RelationshipType.DESCRIBES} {sbom_doc.describes}')
125      tagvalues.append('')
126
127    for file in sbom_doc.files:
128      if file.id in package.file_ids:
129        tagvalues += TagValueWriter.marshal_file(file)
130
131    return tagvalues
132
133  @staticmethod
134  def marshal_packages(sbom_doc, fragment):
135    tagvalues = []
136    marshaled_relationships = []
137    i = 0
138    packages = sbom_doc.packages
139    while i < len(packages):
140      if (i + 1 < len(packages)
141          and packages[i].id.startswith('SPDXRef-SOURCE-')
142          and packages[i + 1].id.startswith('SPDXRef-UPSTREAM-')):
143        # Output SOURCE, UPSTREAM packages and their VARIANT_OF relationship together, so they are close to each other
144        # in SBOMs in tagvalue format.
145        tagvalues += TagValueWriter.marshal_package(sbom_doc, packages[i], fragment)
146        tagvalues += TagValueWriter.marshal_package(sbom_doc, packages[i + 1], fragment)
147        rel = next((r for r in sbom_doc.relationships if
148                    r.id1 == packages[i].id and
149                    r.id2 == packages[i + 1].id and
150                    r.relationship == sbom_data.RelationshipType.VARIANT_OF), None)
151        if rel:
152          marshaled_relationships.append(rel)
153          tagvalues.append(TagValueWriter.marshal_relationship(rel))
154          tagvalues.append('')
155
156        i += 2
157      else:
158        tagvalues += TagValueWriter.marshal_package(sbom_doc, packages[i], fragment)
159        i += 1
160
161    return tagvalues, marshaled_relationships
162
163  @staticmethod
164  def marshal_file(file):
165    tagvalues = [
166      f'{Tags.FILE_NAME}: {file.name}',
167      f'{Tags.SPDXID}: {file.id}',
168      f'{Tags.FILE_CHECKSUM}: {file.checksum}',
169    ]
170    license = sbom_data.VALUE_NOASSERTION
171    if file.concluded_license_ids:
172      license = ' OR '.join(file.concluded_license_ids)
173    tagvalues.append(f'{Tags.FILE_LICENSE_CONCLUDED}: {license}')
174    tagvalues.append('')
175
176    return tagvalues
177
178  @staticmethod
179  def marshal_files(sbom_doc, fragment):
180    tagvalues = []
181    files_in_packages = []
182    for package in sbom_doc.packages:
183      files_in_packages += package.file_ids
184    for file in sbom_doc.files:
185      if file.id in files_in_packages:
186        continue
187      tagvalues += TagValueWriter.marshal_file(file)
188      if file.id == sbom_doc.describes and not fragment:
189        # Fragment is not a full SBOM document so the relationship DESCRIBES is not applicable.
190        tagvalues.append(
191            f'{Tags.RELATIONSHIP}: {sbom_doc.id} {sbom_data.RelationshipType.DESCRIBES} {sbom_doc.describes}')
192        tagvalues.append('')
193    return tagvalues
194
195  @staticmethod
196  def marshal_relationship(rel):
197    return f'{Tags.RELATIONSHIP}: {rel.id1} {rel.relationship} {rel.id2}'
198
199  @staticmethod
200  def marshal_relationships(sbom_doc, marshaled_rels):
201    tagvalues = []
202    sorted_rels = sorted(sbom_doc.relationships, key=lambda r: r.id2 + r.id1)
203    for rel in sorted_rels:
204      if any(r.id1 == rel.id1 and r.id2 == rel.id2 and r.relationship == rel.relationship
205             for r in marshaled_rels):
206        continue
207      tagvalues.append(TagValueWriter.marshal_relationship(rel))
208    tagvalues.append('')
209    return tagvalues
210
211  @staticmethod
212  def marshal_license(license):
213    tagvalues = []
214    tagvalues.append(f'{Tags.LICENSE_ID}: {license.id}')
215    tagvalues.append(f'{Tags.LICENSE_NAME}: {license.name}')
216    tagvalues.append(f'{Tags.LICENSE_EXTRACTED_TEXT}: <text>{license.text}</text>')
217    return tagvalues
218
219  @staticmethod
220  def marshal_licenses(sbom_doc):
221    tagvalues = []
222    for license in sbom_doc.licenses:
223      tagvalues += TagValueWriter.marshal_license(license)
224      tagvalues.append('')
225    return tagvalues
226
227  @staticmethod
228  def write(sbom_doc, file, fragment=False):
229    content = []
230    if not fragment:
231      content += TagValueWriter.marshal_doc_headers(sbom_doc)
232    content += TagValueWriter.marshal_files(sbom_doc, fragment)
233    tagvalues, marshaled_relationships = TagValueWriter.marshal_packages(sbom_doc, fragment)
234    content += tagvalues
235    content += TagValueWriter.marshal_relationships(sbom_doc, marshaled_relationships)
236    content += TagValueWriter.marshal_licenses(sbom_doc)
237    file.write('\n'.join(content))
238
239
240class PropNames:
241  # Common
242  SPDXID = 'SPDXID'
243  SPDX_VERSION = 'spdxVersion'
244  DATA_LICENSE = 'dataLicense'
245  NAME = 'name'
246  DOCUMENT_NAMESPACE = 'documentNamespace'
247  CREATION_INFO = 'creationInfo'
248  CREATORS = 'creators'
249  CREATED = 'created'
250  EXTERNAL_DOCUMENT_REF = 'externalDocumentRefs'
251  DOCUMENT_DESCRIBES = 'documentDescribes'
252  EXTERNAL_DOCUMENT_ID = 'externalDocumentId'
253  EXTERNAL_DOCUMENT_URI = 'spdxDocument'
254  EXTERNAL_DOCUMENT_CHECKSUM = 'checksum'
255  ALGORITHM = 'algorithm'
256  CHECKSUM_VALUE = 'checksumValue'
257
258  # Package
259  PACKAGES = 'packages'
260  PACKAGE_DOWNLOAD_LOCATION = 'downloadLocation'
261  PACKAGE_VERSION = 'versionInfo'
262  PACKAGE_SUPPLIER = 'supplier'
263  FILES_ANALYZED = 'filesAnalyzed'
264  PACKAGE_VERIFICATION_CODE = 'packageVerificationCode'
265  PACKAGE_VERIFICATION_CODE_VALUE = 'packageVerificationCodeValue'
266  PACKAGE_EXTERNAL_REFS = 'externalRefs'
267  PACKAGE_EXTERNAL_REF_CATEGORY = 'referenceCategory'
268  PACKAGE_EXTERNAL_REF_TYPE = 'referenceType'
269  PACKAGE_EXTERNAL_REF_LOCATOR = 'referenceLocator'
270  PACKAGE_HAS_FILES = 'hasFiles'
271  PACKAGE_LICENSE_DECLARED = 'licenseDeclared'
272
273  # File
274  FILES = 'files'
275  FILE_NAME = 'fileName'
276  FILE_CHECKSUMS = 'checksums'
277  FILE_LICENSE_CONCLUDED = 'licenseConcluded'
278
279  # Relationship
280  RELATIONSHIPS = 'relationships'
281  REL_ELEMENT_ID = 'spdxElementId'
282  REL_RELATED_ELEMENT_ID = 'relatedSpdxElement'
283  REL_TYPE = 'relationshipType'
284
285  # License
286  LICENSES = 'hasExtractedLicensingInfos'
287  LICENSE_ID = 'licenseId'
288  LICENSE_NAME = 'name'
289  LICENSE_EXTRACTED_TEXT = 'extractedText'
290
291
292class JSONWriter:
293  @staticmethod
294  def marshal_doc_headers(sbom_doc):
295    headers = {
296      PropNames.SPDX_VERSION: SPDX_VER,
297      PropNames.DATA_LICENSE: DATA_LIC,
298      PropNames.SPDXID: sbom_doc.id,
299      PropNames.NAME: sbom_doc.name,
300      PropNames.DOCUMENT_NAMESPACE: sbom_doc.namespace,
301      PropNames.CREATION_INFO: {}
302    }
303    creators = [creator for creator in sbom_doc.creators]
304    headers[PropNames.CREATION_INFO][PropNames.CREATORS] = creators
305    headers[PropNames.CREATION_INFO][PropNames.CREATED] = sbom_doc.created
306    external_refs = []
307    for doc_ref in sbom_doc.external_refs:
308      checksum = doc_ref.checksum.split(': ')
309      external_refs.append({
310        PropNames.EXTERNAL_DOCUMENT_ID: f'{doc_ref.id}',
311        PropNames.EXTERNAL_DOCUMENT_URI: doc_ref.uri,
312        PropNames.EXTERNAL_DOCUMENT_CHECKSUM: {
313          PropNames.ALGORITHM: checksum[0],
314          PropNames.CHECKSUM_VALUE: checksum[1]
315        }
316      })
317    if external_refs:
318      headers[PropNames.EXTERNAL_DOCUMENT_REF] = external_refs
319    headers[PropNames.DOCUMENT_DESCRIBES] = [sbom_doc.describes]
320
321    return headers
322
323  @staticmethod
324  def marshal_packages(sbom_doc):
325    packages = []
326    for p in sbom_doc.packages:
327      package = {
328        PropNames.NAME: p.name,
329        PropNames.SPDXID: p.id,
330        PropNames.PACKAGE_DOWNLOAD_LOCATION: p.download_location if p.download_location else sbom_data.VALUE_NOASSERTION,
331        PropNames.FILES_ANALYZED: p.files_analyzed
332      }
333      if p.version:
334        package[PropNames.PACKAGE_VERSION] = p.version
335      if p.supplier:
336        package[PropNames.PACKAGE_SUPPLIER] = p.supplier
337      package[PropNames.PACKAGE_LICENSE_DECLARED] = sbom_data.VALUE_NOASSERTION
338      if p.declared_license_ids:
339        package[PropNames.PACKAGE_LICENSE_DECLARED] = ' OR '.join(p.declared_license_ids)
340      if p.verification_code:
341        package[PropNames.PACKAGE_VERIFICATION_CODE] = {
342          PropNames.PACKAGE_VERIFICATION_CODE_VALUE: p.verification_code
343        }
344      if p.external_refs:
345        package[PropNames.PACKAGE_EXTERNAL_REFS] = []
346        for ref in p.external_refs:
347          ext_ref = {
348            PropNames.PACKAGE_EXTERNAL_REF_CATEGORY: ref.category,
349            PropNames.PACKAGE_EXTERNAL_REF_TYPE: ref.type,
350            PropNames.PACKAGE_EXTERNAL_REF_LOCATOR: ref.locator,
351          }
352          package[PropNames.PACKAGE_EXTERNAL_REFS].append(ext_ref)
353      if p.file_ids:
354        package[PropNames.PACKAGE_HAS_FILES] = []
355        for file_id in p.file_ids:
356          package[PropNames.PACKAGE_HAS_FILES].append(file_id)
357
358      packages.append(package)
359
360    return {PropNames.PACKAGES: packages}
361
362  @staticmethod
363  def marshal_files(sbom_doc):
364    files = []
365    for f in sbom_doc.files:
366      file = {
367        PropNames.FILE_NAME: f.name,
368        PropNames.SPDXID: f.id
369      }
370      checksum = f.checksum.split(': ')
371      file[PropNames.FILE_CHECKSUMS] = [{
372        PropNames.ALGORITHM: checksum[0],
373        PropNames.CHECKSUM_VALUE: checksum[1],
374      }]
375      file[PropNames.FILE_LICENSE_CONCLUDED] = sbom_data.VALUE_NOASSERTION
376      if f.concluded_license_ids:
377        file[PropNames.FILE_LICENSE_CONCLUDED] = ' OR '.join(f.concluded_license_ids)
378      files.append(file)
379    return {PropNames.FILES: files}
380
381  @staticmethod
382  def marshal_relationships(sbom_doc):
383    relationships = []
384    sorted_rels = sorted(sbom_doc.relationships, key=lambda r: r.relationship + r.id2 + r.id1)
385    for r in sorted_rels:
386      rel = {
387        PropNames.REL_ELEMENT_ID: r.id1,
388        PropNames.REL_RELATED_ELEMENT_ID: r.id2,
389        PropNames.REL_TYPE: r.relationship,
390      }
391      relationships.append(rel)
392
393    return {PropNames.RELATIONSHIPS: relationships}
394
395  @staticmethod
396  def marshal_licenses(sbom_doc):
397    licenses = []
398    for l in sbom_doc.licenses:
399      licenses.append({
400          PropNames.LICENSE_ID: l.id,
401          PropNames.LICENSE_NAME: l.name,
402          PropNames.LICENSE_EXTRACTED_TEXT: f'<text>{l.text}</text>'
403      })
404    return {PropNames.LICENSES: licenses}
405
406  @staticmethod
407  def write(sbom_doc, file):
408    doc = {}
409    doc.update(JSONWriter.marshal_doc_headers(sbom_doc))
410    doc.update(JSONWriter.marshal_packages(sbom_doc))
411    doc.update(JSONWriter.marshal_files(sbom_doc))
412    doc.update(JSONWriter.marshal_relationships(sbom_doc))
413    doc.update(JSONWriter.marshal_licenses(sbom_doc))
414    file.write(json.dumps(doc, indent=4))
415