diff mbox series

[RFC,1/2] spdx30: Add OpenVEX standalone document generation

Message ID 20260331141956.608976-2-stondo@gmail.com
State New
Headers show
Series spdx30: Add OpenVEX standalone document generation | expand

Commit Message

Stefano Tondo March 31, 2026, 2:19 p.m. UTC
From: Stefano Tondo <stefano.tondo.ext@siemens.com>

Add OpenVEX document generation integrated into the SPDX 3.0 recipe-level
workflow. When enabled, standalone .vex.json files are generated alongside
SPDX documents for each recipe with CVE data.

Key changes:
- Add generate_openvex_from_spdx() and helper functions to spdx30_tasks.py
  that create OpenVEX documents from SPDX VEX assessment relationships
- Map SPDX VEX status to OpenVEX status (Patched->fixed, Unpatched->affected,
  Ignored->not_affected, Unknown->under_investigation)
- Extract product PURLs from SPDX packages with proper fallback chain
- Add VEX sstate copy in create_package_spdx() for sstate restore survival
- Add OPENVEX_GENERATE_STANDALONE, OPENVEX_AUTHOR, OPENVEX_ROLE variables
  to create-spdx-3.0.bbclass (default disabled)
- Add SSTATE_ALLOW_OVERLAP_FILES for DEPLOY_DIR_SPDX
- Document OpenVEX variables in spdx-common.bbclass

This implementation is designed to work with Joshua Watt's recipe-level
SPDX architecture where VEX data is created in create_recipe_spdx() with
the recipe_objset and 4-tuple cve_by_status format.

Signed-off-by: Stefano Tondo <stefano.tondo.ext@siemens.com>
---
 meta/classes/create-spdx-3.0.bbclass |  19 +++
 meta/classes/spdx-common.bbclass     |  15 +++
 meta/lib/oe/spdx30_tasks.py          | 193 +++++++++++++++++++++++++++
 3 files changed, 227 insertions(+)
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-3.0.bbclass b/meta/classes/create-spdx-3.0.bbclass
index 432adb14cd..0519f87c41 100644
--- a/meta/classes/create-spdx-3.0.bbclass
+++ b/meta/classes/create-spdx-3.0.bbclass
@@ -45,6 +45,17 @@  SPDX_INCLUDE_VEX[doc] = "Controls what VEX information is in the output. Set to
     including those already fixed upstream (warning: This can be large and \
     slow)."
 
+OPENVEX_GENERATE_STANDALONE ??= "0"
+OPENVEX_GENERATE_STANDALONE[doc] = "Controls whether standalone OpenVEX .vex.json \
+    files are generated alongside SPDX documents. Set to '1' to enable. VEX data \
+    remains embedded in SPDX when SPDX_INCLUDE_VEX is not 'none' regardless."
+
+OPENVEX_AUTHOR ??= "Yocto Build System"
+OPENVEX_AUTHOR[doc] = "Author name for generated OpenVEX documents."
+
+OPENVEX_ROLE ??= "Build System"
+OPENVEX_ROLE[doc] = "Author role for generated OpenVEX documents."
+
 SPDX_INCLUDE_TIMESTAMPS ?= "0"
 SPDX_INCLUDE_TIMESTAMPS[doc] = "Include time stamps in SPDX output. This is \
     useful if you want to know when artifacts were produced and when builds \
@@ -186,6 +197,9 @@  SPDX3_VAR_DEPS = "\
     SPDX_PROFILES \
     SPDX_NAMESPACE_PREFIX \
     SPDX_UUID_NAMESPACE \
+    OPENVEX_GENERATE_STANDALONE \
+    OPENVEX_AUTHOR \
+    OPENVEX_ROLE \
     "
 
 python do_create_recipe_spdx() {
@@ -223,6 +237,11 @@  SSTATETASKS += "do_create_spdx"
 do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
 do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
 do_create_spdx[file-checksums] += "${SPDX3_DEP_FILES}"
+
+# Allow VEX files to overlap between create_recipe_spdx and
+# create_package_spdx sstate. VEX is generated during create_recipe_spdx
+# and copied to create_package_spdx sstate to ensure it survives restore.
+SSTATE_ALLOW_OVERLAP_FILES += "${DEPLOY_DIR_SPDX}"
 do_create_spdx[deptask] += "do_create_spdx"
 do_create_spdx[dirs] = "${SPDXWORK}"
 do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
index 40701730a6..c2960f04d2 100644
--- a/meta/classes/spdx-common.bbclass
+++ b/meta/classes/spdx-common.bbclass
@@ -82,6 +82,21 @@  SPDX_MULTILIB_SSTATE_ARCHS[doc] = "The list of sstate architectures to consider
     when collecting SPDX dependencies. This includes multilib architectures when \
     multilib is enabled. Defaults to SSTATE_ARCHS."
 
+OPENVEX_GENERATE_STANDALONE[doc] = "Controls whether standalone OpenVEX .vex.json \
+    files are generated in addition to VEX data embedded in SPDX documents. Set to \
+    '1' to enable standalone file generation. VEX data remains embedded in SPDX when \
+    SPDX_INCLUDE_VEX is not 'none' regardless of this setting. \
+    Default: '0' (disabled). Defined in create-spdx-3.0.bbclass."
+
+OPENVEX_AUTHOR[doc] = "Author name for generated OpenVEX documents. Identifies \
+    the person or organization that created the VEX document. \
+    Default: 'Yocto Build System'. Defined in create-spdx-3.0.bbclass."
+
+OPENVEX_ROLE[doc] = "Author role for generated OpenVEX documents. Describes the \
+    capacity in which the author is creating the VEX document (e.g., 'Build System', \
+    'Security Team', 'Maintainer'). Default: 'Build System'. \
+    Defined in create-spdx-3.0.bbclass."
+
 SPDX_FILE_EXCLUDE_PATTERNS ??= ""
 SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of Python regular \
     expressions to exclude files from SPDX output. Files whose paths match \
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index cd9672c18e..ba9bef3105 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -790,9 +790,188 @@  def create_recipe_spdx(d):
             sorted(list(all_cves)),
         )
 
+    # Generate standalone OpenVEX document from recipe VEX data
+    generate_openvex_from_spdx(d, recipe_objset, deploydir, cve_by_status)
+
     oe.sbom30.write_recipe_jsonld_doc(d, recipe_objset, "static", deploydir)
 
 
+def generate_openvex_from_spdx(d, objset, deploydir, cve_by_status=None):
+    """
+    Generate OpenVEX document from SPDX 3.0.1 in-memory data structure.
+
+    Called from create_recipe_spdx() where CVE/VEX data originates,
+    leveraging the cve_by_status dict for accurate status mapping.
+    """
+    import json
+    import hashlib
+    from datetime import datetime, timezone
+
+    generate_standalone = d.getVar("OPENVEX_GENERATE_STANDALONE")
+    if generate_standalone != "1":
+        return
+
+    include_vex = d.getVar("SPDX_INCLUDE_VEX")
+    if include_vex == "none":
+        return
+
+    statements = []
+
+    if cve_by_status:
+        # Use cve_by_status dict directly (preferred path)
+        for status_key, cves in cve_by_status.items():
+            for cve_id, items in cves.items():
+                spdx_cve, detail, description, resources = items
+
+                statement = _make_vex_statement(d, objset, cve_id, status_key,
+                                                detail, description)
+                if statement:
+                    statements.append(statement)
+    else:
+        # Fallback: extract from VEX assessment relationships in objset
+        for obj in objset.foreach_type(oe.spdx30.security_Vulnerability):
+            cve_id = _get_cve_id(obj)
+            status, detail, description = _get_vex_status_from_relationships(
+                objset, obj
+            )
+            statement = _make_vex_statement(d, objset, cve_id, status,
+                                            detail, description)
+            if statement:
+                statements.append(statement)
+
+    if not statements:
+        bb.debug(1, "No vulnerabilities found in %s, skipping OpenVEX" % d.getVar("PN"))
+        return
+
+    author = d.getVar("OPENVEX_AUTHOR") or "Yocto Build System"
+    role = d.getVar("OPENVEX_ROLE") or "Build System"
+
+    statements_json = json.dumps(statements, sort_keys=True)
+    doc_id = hashlib.sha256(statements_json.encode()).hexdigest()[:16]
+
+    openvex_doc = {
+        "@context": "https://openvex.dev/ns/v0.2.0",
+        "@id": "https://openvex.dev/docs/yocto/vex-%s" % doc_id,
+        "author": author,
+        "role": role,
+        "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+        "version": 1,
+        "statements": statements,
+    }
+
+    # Write VEX to sstate staging area (deploydir) so it is included in
+    # the do_create_recipe_spdx sstate output and survives sstate restore.
+    pkg_arch = d.getVar("SSTATE_PKGARCH")
+    pkg_name = d.getVar("PN")
+    openvex_file = deploydir / pkg_arch / "recipes" / ("%s.vex.json" % pkg_name)
+
+    openvex_file.parent.mkdir(parents=True, exist_ok=True)
+
+    try:
+        with open(openvex_file, "w") as f:
+            json.dump(openvex_doc, f, indent=2)
+        bb.debug(1, "Created OpenVEX document: %s (%d statements)" % (
+            openvex_file, len(statements)))
+    except Exception as e:
+        bb.warn("Failed to write OpenVEX file %s: %s" % (openvex_file, e))
+
+
+def _get_cve_id(vuln_obj):
+    """Extract CVE ID from vulnerability external identifiers."""
+    for ext_id in vuln_obj.externalIdentifier:
+        if ext_id.identifier and ext_id.identifier.startswith("CVE-"):
+            return ext_id.identifier
+    return "Unknown"
+
+
+def _get_vex_status_from_relationships(objset, vuln_obj):
+    """Extract VEX status from SPDX assessment relationships (fallback path)."""
+    vuln_link = oe.sbom30.get_element_link_id(vuln_obj)
+
+    for rel in objset.foreach_type(oe.spdx30.security_VexVulnAssessmentRelationship):
+        if vuln_link in rel.to or vuln_link in rel.from_:
+            if rel.relationshipType == oe.spdx30.RelationshipType.fixedIn:
+                return "Patched", None, None
+            elif rel.relationshipType == oe.spdx30.RelationshipType.affects:
+                return "Unpatched", None, None
+            elif rel.relationshipType == oe.spdx30.RelationshipType.doesNotAffect:
+                desc = getattr(rel, "security_impactStatement", None)
+                return "Ignored", None, desc
+
+    return "Unknown", None, None
+
+
+def _make_vex_statement(d, objset, cve_id, status_key, detail, description):
+    """Create an OpenVEX statement dict from CVE status information."""
+    products = _extract_products(d, objset)
+
+    status_map = {
+        "Patched": "fixed",
+        "Unpatched": "affected",
+        "Ignored": "not_affected",
+        "Unknown": "under_investigation",
+    }
+    status = status_map.get(status_key, "affected")
+
+    statement = {
+        "vulnerability": {"name": cve_id},
+        "products": products,
+        "status": status,
+    }
+
+    if status == "fixed" and detail:
+        statement["status_notes"] = "Patched: %s" % detail
+
+    if status == "affected" and detail:
+        statement["status_notes"] = "Unpatched: %s" % detail
+        statement["action_statement"] = (
+            "This vulnerability is not yet patched. Consider updating "
+            "to a newer version or applying a backport patch."
+        )
+
+    if status == "not_affected":
+        statement["justification"] = "vulnerable_code_not_in_execute_path"
+        if description:
+            statement["impact_statement"] = description
+
+    if status == "under_investigation":
+        statement["status_notes"] = "CVE status is unknown or under investigation"
+
+    return statement
+
+
+def _extract_products(d, objset):
+    """Extract product identifiers (PURLs) from SPDX objset."""
+    products = []
+
+    for pkg in objset.foreach_type(oe.spdx30.software_Package):
+        if hasattr(pkg, "software_packageUrl") and pkg.software_packageUrl:
+            products.append({"@id": pkg.software_packageUrl})
+            continue
+
+        for ext_id in pkg.externalIdentifier:
+            if (
+                ext_id.externalIdentifierType
+                == oe.spdx30.ExternalIdentifierType.packageUrl
+            ):
+                products.append({"@id": ext_id.identifier})
+                break
+
+    # Fallback: generate PURL from recipe metadata
+    if not products:
+        recipe_purl = oe.purl.get_base_purl(d)
+        if recipe_purl:
+            products.append({"@id": "%s?type=source" % recipe_purl})
+        else:
+            doc_id = oe.sbom30.get_element_link_id(objset.doc)
+            if doc_id:
+                products.append({"@id": doc_id})
+            else:
+                products.append({"@id": "urn:spdx:unknown"})
+
+    return products
+
+
 def load_recipe_spdx(d):
 
     return oe.sbom30.find_root_obj_in_jsonld(
@@ -1133,6 +1312,20 @@  def create_package_spdx(d):
     providers = oe.spdx_common.collect_package_providers(d, direct_deps)
     pkg_arch = d.getVar("SSTATE_PKGARCH")
 
+    # Copy VEX file from create_recipe_spdx deploy output to
+    # create_package_spdx sstate input as a secondary capture path.
+    # The primary path is via create_recipe_spdx sstate, but this
+    # ensures VEX files are also available if create_package_spdx
+    # sstate is restored independently.
+    import shutil
+    pn = d.getVar("PN")
+    vex_src = deploy_dir_spdx / pkg_arch / "recipes" / ("%s.vex.json" % pn)
+    if vex_src.exists():
+        vex_dest = deploydir / pkg_arch / "recipes" / ("%s.vex.json" % pn)
+        vex_dest.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(str(vex_src), str(vex_dest))
+        bb.debug(1, "Copied VEX file to sstate: %s" % vex_dest)
+
     if get_is_native(d):
         return