@@ -156,6 +156,13 @@ SPDX_RECIPE_SBOM_NAME ?= "${PN}-recipe-sbom"
SPDX_RECIPE_SBOM_NAME[doc] = "The name of output recipe SBoM when using \
create_recipe_sbom"
+SPDX_GIT_PURL_MAPPINGS ??= ""
+SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
+ mappings to configure PURL generation for Git source downloads. \
+ For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
+ on gitlab.example.com to the pkg:gitlab PURL type. \
+ github.com is always mapped to pkg:github by default."
+
IMAGE_CLASSES:append = " create-spdx-image-3.0"
SDK_CLASSES += "create-spdx-sdk-3.0"
@@ -34,7 +34,9 @@ def set_timestamp_now(d, o, prop):
delattr(o, prop)
-def add_license_expression(d, objset, license_expression, license_data):
+def add_license_expression(
+ d, objset, license_expression, license_data, search_objsets=[]
+):
simple_license_text = {}
license_text_map = {}
license_ref_idx = 0
@@ -46,14 +48,15 @@ def add_license_expression(d, objset, license_expression, license_data):
if name in simple_license_text:
return simple_license_text[name]
- lic = objset.find_filter(
- oe.spdx30.simplelicensing_SimpleLicensingText,
- name=name,
- )
+ for o in [objset] + search_objsets:
+ lic = o.find_filter(
+ oe.spdx30.simplelicensing_SimpleLicensingText,
+ name=name,
+ )
- if lic is not None:
- simple_license_text[name] = lic
- return lic
+ if lic is not None:
+ simple_license_text[name] = lic
+ return lic
lic = objset.add(
oe.spdx30.simplelicensing_SimpleLicensingText(
@@ -147,37 +150,58 @@ def add_package_files(
ignore_dirs=[],
ignore_top_level_dirs=[],
):
- if os.path.isdir(image_path):
- a, _ = add_package_files(
- d,
- objset,
- image_path,
- lambda file_counter: objset.new_spdxid(
- "imagefile", str(file_counter)
- ),
- lambda filepath: [],
- license_data=None,
- ignore_dirs=[],
- ignore_top_level_dirs=[],
- archive=None,
- )
+ source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
+ if source_date_epoch:
+ source_date_epoch = int(source_date_epoch)
+
+ spdx_files = set()
+
+ file_counter = 1
+ if not os.path.exists(topdir):
+ bb.note(f"Skip {topdir}")
+ return spdx_files, set()
+
+ check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+ if check_compiled_sources:
+ compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
+ bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
+
+ exclude_patterns = [
+ re.compile(pattern)
+ for pattern in (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
+ ]
+ excluded_files = set()
+
+ for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
+ dirs[:] = [directory for directory in dirs if directory not in ignore_dirs]
+ if subdir == str(topdir):
+ dirs[:] = [
+ directory
+ for directory in dirs
+ if directory not in ignore_top_level_dirs
+ ]
+
+ dirs.sort()
+ files.sort()
+ for file in files:
+ filepath = Path(subdir) / file
if filepath.is_symlink() or not filepath.is_file():
continue
filename = str(filepath.relative_to(topdir))
- # Apply file exclusion filtering
- if exclude_patterns:
- if any(p.search(filename) for p in exclude_patterns):
- excluded_files.add(filename)
- continue
+ if exclude_patterns and any(
+ pattern.search(filename) for pattern in exclude_patterns
+ ):
+ excluded_files.add(filename)
+ continue
file_purposes = get_purposes(filepath)
- # Check if file is compiled
- if check_compiled_sources:
- if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
- continue
+ if check_compiled_sources and not oe.spdx_common.is_compiled_source(
+ filename, compiled_sources, types
+ ):
+ continue
spdx_file = objset.new_file(
get_spdxid(file_counter),
@@ -300,17 +324,14 @@ def get_package_sources_from_debug(
return dep_source_files
-def collect_dep_objsets(d, build):
- deps = oe.spdx_common.get_spdx_deps(d)
-
+def collect_dep_objsets(d, direct_deps, subdir, fn_prefix, obj_type, **attr_filter):
dep_objsets = []
- dep_builds = set()
+ dep_objs = set()
- dep_build_spdxids = set()
- for dep in deps:
+ for dep in direct_deps:
bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
- dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
- d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
+ dep_obj, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
+ d, subdir, fn_prefix + dep.pn, obj_type, **attr_filter
)
# If the dependency is part of the taskhash, return it to be linked
# against. Otherwise, it cannot be linked against because this recipe
@@ -318,10 +339,10 @@ def collect_dep_objsets(d, build):
if dep.in_taskhash:
dep_objsets.append(dep_objset)
- # The build _can_ be linked against (by alias)
- dep_builds.add(dep_build)
+ # The object _can_ be linked against (by alias)
+ dep_objs.add(dep_obj)
- return dep_objsets, dep_builds
+ return dep_objsets, dep_objs
def index_sources_by_hash(sources, dest):
@@ -585,6 +606,201 @@ def set_purposes(d, element, *var_names, force_purposes=[]):
]
+def set_purls(spdx_package, purls):
+ if purls:
+ spdx_package.software_packageUrl = purls[0]
+
+ for p in sorted(set(purls)):
+ spdx_package.externalIdentifier.append(
+ oe.spdx30.ExternalIdentifier(
+ externalIdentifierType=oe.spdx30.ExternalIdentifierType.packageUrl,
+ identifier=p,
+ )
+ )
+
+
+def get_is_native(d):
+ return bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
+
+
+def create_recipe_spdx(d):
+ deploydir = Path(d.getVar("SPDXRECIPEDEPLOY"))
+ pn = d.getVar("PN")
+
+ license_data = oe.spdx_common.load_spdx_license_data(d)
+
+ include_vex = d.getVar("SPDX_INCLUDE_VEX")
+ if not include_vex in ("none", "current", "all"):
+ bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
+
+ recipe_objset = oe.sbom30.ObjectSet.new_objset(d, "static-" + pn)
+
+ recipe = recipe_objset.add_root(
+ oe.spdx30.software_Package(
+ _id=recipe_objset.new_spdxid("recipe", pn),
+ creationInfo=recipe_objset.doc.creationInfo,
+ name=d.getVar("PN"),
+ software_packageVersion=d.getVar("PV"),
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.specification,
+ software_sourceInfo=json.dumps(
+ {
+ "FILENAME": os.path.basename(d.getVar("FILE")),
+ "FILE_LAYERNAME": d.getVar("FILE_LAYERNAME"),
+ },
+ separators=(",", ":"),
+ ),
+ )
+ )
+
+ if get_is_native(d):
+ ext = oe.sbom30.OERecipeExtension()
+ ext.is_native = True
+ recipe.extension.append(ext)
+
+ set_purls(recipe, (d.getVar("SPDX_PACKAGE_URLS") or "").split())
+
+ if val := d.getVar("HOMEPAGE"):
+ recipe.software_homePage = val
+
+ if val := d.getVar("SUMMARY"):
+ recipe.summary = val
+
+ if val := d.getVar("DESCRIPTION"):
+ recipe.description = val
+
+ for cpe_id in oe.cve_check.get_cpe_ids(
+ d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")
+ ):
+ recipe.externalIdentifier.append(
+ oe.spdx30.ExternalIdentifier(
+ externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
+ identifier=cpe_id,
+ )
+ )
+
+ direct_deps = oe.spdx_common.collect_direct_deps(d, "do_create_recipe_spdx")
+
+ dep_objsets, dep_recipes = collect_dep_objsets(
+ d, direct_deps, "static", "static-", oe.spdx30.software_Package
+ )
+
+ if dep_recipes:
+ recipe_objset.new_scoped_relationship(
+ [recipe],
+ oe.spdx30.RelationshipType.dependsOn,
+ oe.spdx30.LifecycleScopeType.build,
+ sorted(oe.sbom30.get_element_link_id(dep) for dep in dep_recipes),
+ )
+
+ cve_by_status = {}
+ if include_vex != "none":
+ patched_cves = oe.cve_check.get_patched_cves(d)
+ for cve, patched_cve in patched_cves.items():
+ mapping = patched_cve["abbrev-status"]
+ detail = patched_cve["status"]
+ description = patched_cve.get("justification", None)
+ resources = patched_cve.get("resource", [])
+
+ if include_vex != "all" and detail in (
+ "fixed-version",
+ "cpe-stable-backport",
+ ):
+ bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
+ continue
+
+ spdx_cve = recipe_objset.new_cve_vuln(cve)
+
+ cve_by_status.setdefault(mapping, {})[cve] = (
+ spdx_cve,
+ detail,
+ description,
+ resources,
+ )
+
+ all_cves = set()
+ for status, cves in cve_by_status.items():
+ for cve, items in cves.items():
+ spdx_cve, detail, description, resources = items
+ spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
+
+ all_cves.add(spdx_cve)
+
+ if status == "Patched":
+ spdx_vex = recipe_objset.new_vex_patched_relationship(
+ [spdx_cve_id], [recipe]
+ )
+ patches = []
+ for idx, filepath in enumerate(resources):
+ patches.append(
+ recipe_objset.new_file(
+ recipe_objset.new_spdxid(
+ "patch", str(idx), os.path.basename(filepath)
+ ),
+ os.path.basename(filepath),
+ filepath,
+ purposes=[oe.spdx30.software_SoftwarePurpose.patch],
+ hashfile=os.path.isfile(filepath),
+ )
+ )
+
+ if patches:
+ recipe_objset.new_scoped_relationship(
+ spdx_vex,
+ oe.spdx30.RelationshipType.patchedBy,
+ oe.spdx30.LifecycleScopeType.build,
+ patches,
+ )
+
+ elif status == "Unpatched":
+ recipe_objset.new_vex_unpatched_relationship([spdx_cve_id], [recipe])
+ elif status == "Ignored":
+ spdx_vex = recipe_objset.new_vex_ignored_relationship(
+ [spdx_cve_id],
+ [recipe],
+ impact_statement=description,
+ )
+
+ vex_just_type = d.getVarFlag("CVE_CHECK_VEX_JUSTIFICATION", detail)
+ if vex_just_type:
+ if (
+ vex_just_type
+ not in oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS
+ ):
+ bb.fatal(
+ f"Unknown vex justification '{vex_just_type}', detail '{detail}', for ignored {cve}"
+ )
+
+ for v in spdx_vex:
+ v.security_justificationType = (
+ oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS[
+ vex_just_type
+ ]
+ )
+
+ elif status == "Unknown":
+ bb.note(f"Skipping {cve} with status 'Unknown'")
+ else:
+ bb.fatal(f"Unknown {cve} status '{status}'")
+
+ if all_cves:
+ recipe_objset.new_relationship(
+ [recipe],
+ oe.spdx30.RelationshipType.hasAssociatedVulnerability,
+ sorted(list(all_cves)),
+ )
+
+ oe.sbom30.write_recipe_jsonld_doc(d, recipe_objset, "static", deploydir)
+
+
+def load_recipe_spdx(d):
+ return oe.sbom30.find_root_obj_in_jsonld(
+ d,
+ "static",
+ "static-" + d.getVar("PN"),
+ oe.spdx30.software_Package,
+ )
+
+
def create_spdx(d):
def set_var_field(var, obj, name, package=None):
val = None
@@ -599,19 +815,15 @@ def create_spdx(d):
license_data = oe.spdx_common.load_spdx_license_data(d)
+ pn = d.getVar("PN")
deploydir = Path(d.getVar("SPDXDEPLOY"))
- deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
spdx_workdir = Path(d.getVar("SPDXWORK"))
include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
- pkg_arch = d.getVar("SSTATE_PKGARCH")
- is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
- "cross", d
- )
- include_vex = d.getVar("SPDX_INCLUDE_VEX")
- if not include_vex in ("none", "current", "all"):
- bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
+ is_native = get_is_native(d)
+
+ recipe, recipe_objset = load_recipe_spdx(d)
- build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
+ build_objset = oe.sbom30.ObjectSet.new_objset(d, "build-" + pn)
build = build_objset.new_task_build("recipe", "recipe")
build_objset.set_element_alias(build)
@@ -629,47 +841,13 @@ def create_spdx(d):
build_inputs = set()
- # Add CVEs
- cve_by_status = {}
- if include_vex != "none":
- patched_cves = oe.cve_check.get_patched_cves(d)
- for cve, patched_cve in patched_cves.items():
- decoded_status = {
- "mapping": patched_cve["abbrev-status"],
- "detail": patched_cve["status"],
- "description": patched_cve.get("justification", None)
- }
-
- # If this CVE is fixed upstream, skip it unless all CVEs are
- # specified.
- if (
- include_vex != "all"
- and "detail" in decoded_status
- and decoded_status["detail"]
- in (
- "fixed-version",
- "cpe-stable-backport",
- )
- ):
- bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
- continue
-
- spdx_cve = build_objset.new_cve_vuln(cve)
- build_objset.set_element_alias(spdx_cve)
-
- cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
- spdx_cve,
- decoded_status["detail"],
- decoded_status["description"],
- )
-
cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
source_files = add_download_files(d, build_objset)
build_inputs |= source_files
recipe_spdx_license = add_license_expression(
- d, build_objset, d.getVar("LICENSE"), license_data
+ d, build_objset, d.getVar("LICENSE"), license_data, [recipe_objset]
)
build_objset.new_relationship(
source_files,
@@ -698,7 +876,11 @@ def create_spdx(d):
build_inputs |= files
index_sources_by_hash(files, dep_sources)
- dep_objsets, dep_builds = collect_dep_objsets(d, build)
+ direct_deps = oe.spdx_common.collect_direct_deps(d, "do_create_spdx")
+
+ dep_objsets, dep_builds = collect_dep_objsets(
+ d, direct_deps, "builds", "build-", oe.spdx30.build_Build
+ )
if dep_builds:
build_objset.new_scoped_relationship(
[build],
@@ -768,16 +950,7 @@ def create_spdx(d):
or ""
).split()
- if purls:
- spdx_package.software_packageUrl = purls[0]
-
- for p in sorted(set(purls)):
- spdx_package.externalIdentifier.append(
- oe.spdx30.ExternalIdentifier(
- externalIdentifierType=oe.spdx30.ExternalIdentifierType.packageUrl,
- identifier=p,
- )
- )
+ set_purls(spdx_package, purls)
pkg_objset.new_scoped_relationship(
[oe.sbom30.get_element_link_id(build)],
@@ -786,6 +959,13 @@ def create_spdx(d):
[spdx_package],
)
+ pkg_objset.new_scoped_relationship(
+ [oe.sbom30.get_element_link_id(recipe)],
+ oe.spdx30.RelationshipType.generates,
+ oe.spdx30.LifecycleScopeType.build,
+ [spdx_package],
+ )
+
for cpe_id in cpe_ids:
spdx_package.externalIdentifier.append(
oe.spdx30.ExternalIdentifier(
@@ -819,7 +999,11 @@ def create_spdx(d):
package_license = d.getVar("LICENSE:%s" % package)
if package_license and package_license != d.getVar("LICENSE"):
package_spdx_license = add_license_expression(
- d, build_objset, package_license, license_data
+ d,
+ build_objset,
+ package_license,
+ license_data,
+ [recipe_objset],
)
else:
package_spdx_license = recipe_spdx_license
@@ -844,59 +1028,6 @@ def create_spdx(d):
[oe.sbom30.get_element_link_id(concluded_spdx_license)],
)
- # NOTE: CVE Elements live in the recipe collection
- all_cves = set()
- for status, cves in cve_by_status.items():
- for cve, items in cves.items():
- spdx_cve, detail, description = items
- spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
-
- all_cves.add(spdx_cve_id)
-
- if status == "Patched":
- pkg_objset.new_vex_patched_relationship(
- [spdx_cve_id], [spdx_package]
- )
- elif status == "Unpatched":
- pkg_objset.new_vex_unpatched_relationship(
- [spdx_cve_id], [spdx_package]
- )
- elif status == "Ignored":
- spdx_vex = pkg_objset.new_vex_ignored_relationship(
- [spdx_cve_id],
- [spdx_package],
- impact_statement=description,
- )
-
- vex_just_type = d.getVarFlag(
- "CVE_CHECK_VEX_JUSTIFICATION", detail
- )
- if vex_just_type:
- if (
- vex_just_type
- not in oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS
- ):
- bb.fatal(
- f"Unknown vex justification '{vex_just_type}', detail '{detail}', for ignored {cve}"
- )
-
- for v in spdx_vex:
- v.security_justificationType = oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS[
- vex_just_type
- ]
-
- elif status == "Unknown":
- bb.note(f"Skipping {cve} with status 'Unknown'")
- else:
- bb.fatal(f"Unknown {cve} status '{status}'")
-
- if all_cves:
- pkg_objset.new_relationship(
- [spdx_package],
- oe.spdx30.RelationshipType.hasAssociatedVulnerability,
- sorted(list(all_cves)),
- )
-
bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
package_files, excluded_files = add_package_files(
d,
@@ -982,20 +1113,17 @@ def create_spdx(d):
bb.note(f"Added PACKAGECONFIG entries: {len(enabled)} enabled, {len(disabled)} disabled")
- oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
+ oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "builds", deploydir)
def create_package_spdx(d):
deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
- is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
- "cross", d
- )
-
- providers = oe.spdx_common.collect_package_providers(d)
+ direct_deps = oe.spdx_common.collect_direct_deps(d, "do_create_spdx")
+ providers = oe.spdx_common.collect_package_providers(d, direct_deps)
pkg_arch = d.getVar("SSTATE_PKGARCH")
- if is_native:
+ if get_is_native(d):
return
bb.build.exec_func("read_subpackage_metadata", d)
Add version extraction, PURL generation, and external references to source download packages in SPDX 3.0 SBOMs: - Extract version from SRCREV for Git sources (full SHA-1) - Generate PURLs for Git sources on github.com by default - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable (format: "domain:purl_type", split(':', 1) for parsing) - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git - Add VCS external references for Git downloads - Add distribution external references for tarball downloads - Parse Git URLs using urllib.parse - Extract logic into _generate_git_purl() and _enrich_source_package() helpers For non-Git sources, version is not set from PV since the recipe version does not necessarily reflect the version of individual downloaded files. Ecosystem PURLs (which include version) from SPDX_PACKAGE_URLS are still used when available. The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL generation for self-hosted Git services (e.g., GitLab). github.com is always mapped to pkg:github by default. Signed-off-by: Stefano Tondo <stefano.tondo.ext@siemens.com> --- meta/classes/create-spdx-3.0.bbclass | 7 + meta/lib/oe/spdx30_tasks.py | 444 +++++++++++++++++---------- 2 files changed, 293 insertions(+), 158 deletions(-)