diff mbox series

[RFC,1/1] spdx: Add software file externalRef support

Message ID 20251110171337.754568-2-fbberton@gmail.com
State New
Headers show
Series spdx: Add software file externalRef support | expand

Commit Message

Fabio Berton Nov. 10, 2025, 5:13 p.m. UTC
From: Fabio Berton <fabio.berton@criticaltechworks.com>

Add support for including external references in SPDX 3.0 file elements,
allowing files fetched via file:// protocol to reference their source
location via absolute path or git URL with commit hash.

Change add the following variables:

- SPDX_FILE_LOCATION variable with three options:
  * 'none' (default) - disables external references
  * 'path' - adds absolute file path as external reference
  * 'git' - generates git URL with commit hash in format
    git+https://host/repo@commit#path/to/file

- SPDX_FILE_LOCATION_REF_TYPE control the external reference type
  (defaults to 'sourceArtifact')

- SPDX_FILE_LOCATION_GIT_REMOTE to specify which git remote to use
  when constructing git URLs (defaults to 'origin')

The git URL generation requires files to be in a git repository with a
configured remote.

Signed-off-by: Fabio Berton <fabio.berton@criticaltechworks.com>
---
 meta/classes/create-spdx-3.0.bbclass | 24 +++++++
 meta/lib/oe/sbom30.py                | 14 ++++-
 meta/lib/oe/spdx30_tasks.py          | 93 ++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-3.0.bbclass b/meta/classes/create-spdx-3.0.bbclass
index a6d2d44e34..efdd1feb59 100644
--- a/meta/classes/create-spdx-3.0.bbclass
+++ b/meta/classes/create-spdx-3.0.bbclass
@@ -122,6 +122,30 @@  SPDX_PACKAGE_URL[doc] = "Provides a place for the SPDX data creator to record \
 the package URL string (in accordance with the Package URL specification) for \
 a software Package."
 
+SPDX_FILE_LOCATION ?= "none"
+SPDX_FILE_LOCATION[type] = "choice"
+SPDX_FILE_LOCATION[choices] = "none path git"
+# TODO: Type validation only works when inheriting typecheck bbclass
+
+# Available options are listed here:
+# https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Vocabularies/ExternalRefType
+SPDX_FILE_LOCATION_REF_TYPE ?= "sourceArtifact"
+
+# Use 'origin' remote as default, this option is only valid when
+# SPDX_FILE_LOCATION is set to 'git'
+SPDX_FILE_LOCATION_GIT_REMOTE ?= "origin"
+
+SPDX_FILE_LOCATION[doc] = "Controls whether and how to add external references \
+    to source files in the SPDX SBOM. Valid options are: \
+    'none' (default) - disables external references \
+    'path' - adds the absolute file path as an external reference \
+    'git' - generates Git URLs with commit hash in the format \
+    'git+https://host/repo@commit#path/to/file' using the configured remote \
+    (see SPDX_FILE_LOCATION_GIT_REMOTE). When using 'git', files must be in a \
+    git repository with a configured remote. The external reference type is \
+    controlled by SPDX_FILE_LOCATION_REF_TYPE (default 'sourceArtifact'). \
+    NOTE: Using 'path' may result in non-reproducible SPDX output."
+
 IMAGE_CLASSES:append = " create-spdx-image-3.0"
 SDK_CLASSES += "create-spdx-sdk-3.0"
 
diff --git a/meta/lib/oe/sbom30.py b/meta/lib/oe/sbom30.py
index 227ac51877..276661b81b 100644
--- a/meta/lib/oe/sbom30.py
+++ b/meta/lib/oe/sbom30.py
@@ -4,6 +4,7 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 #
 
+from collections import namedtuple
 from pathlib import Path
 
 import oe.spdx30
@@ -15,6 +16,9 @@  import os
 import oe.spdx_common
 from datetime import datetime, timezone
 
+# Named tuple for external reference information
+ExternalRef = namedtuple("ExternalRef", ["ref_type", "locator"])
+
 OE_SPDX_BASE = "https://rdf.openembedded.org/spdx/3.0/"
 
 VEX_VERSION = "1.0.0"
@@ -600,7 +604,7 @@  class ObjectSet(oe.spdx30.SHACLObjectSet):
         )
         spdx_file.extension.append(OELicenseScannedExtension())
 
-    def new_file(self, _id, name, path, *, purposes=[]):
+    def new_file(self, _id, name, path, *, purposes=[], external_ref=None):
         sha256_hash = bb.utils.sha256_file(path)
 
         for f in self.by_sha256_hash.get(sha256_hash, []):
@@ -641,6 +645,14 @@  class ObjectSet(oe.spdx30.SHACLObjectSet):
             spdx_file.software_primaryPurpose = purposes[0]
             spdx_file.software_additionalPurpose = purposes[1:]
 
+        if external_ref:
+            spdx_file.externalRef.append(
+                oe.spdx30.ExternalRef(
+                    externalRefType=external_ref.ref_type,
+                    locator=[external_ref.locator],
+                )
+            )
+
         spdx_file.verifiedUsing.append(
             oe.spdx30.Hash(
                 algorithm=oe.spdx30.HashAlgorithm.sha256,
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index f2f133005d..022308d235 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -357,6 +357,93 @@  def collect_dep_sources(dep_objsets, dest):
             index_sources_by_hash(e.to, dest)
 
 
+def get_git_source_url(filepath, d):
+    """
+    Constructs a git URL with commit hash for a file.
+    Returns URL in format: git+https://host/repo@commit#path/to/file
+    Uses SPDX_FILE_LOCATION_GIT_REMOTE to determine which remote to use.
+    """
+    import oe.buildcfg
+
+    filepath = os.path.abspath(filepath)
+    file_dir = os.path.dirname(filepath)
+
+    repo_root = oe.buildcfg.get_metadata_git_toplevel(file_dir)
+    if not repo_root:
+        bb.fatal(
+            f"SPDX_FILE_LOCATION is set to 'git' but file {filepath} is not in a git repository. "
+            f"Please ensure source files are in a git repository or use SPDX_FILE_LOCATION='path'."
+        )
+
+    commit_hash = oe.buildcfg.get_metadata_git_revision(repo_root)
+    if commit_hash == "<unknown>":
+        bb.fatal(
+            f"Could not determine git revision for {filepath} in repository {repo_root}."
+        )
+
+    remote = d.getVar("SPDX_FILE_LOCATION_GIT_REMOTE")
+    if not remote:
+        bb.fatal(
+            "SPDX_FILE_LOCATION_GIT_REMOTE is not set. "
+            "Please set it to a valid git remote name (e.g., 'origin', 'upstream')."
+        )
+
+    remote_url = oe.buildcfg.get_metadata_git_remote_url(repo_root, remote)
+    if not remote_url:
+        bb.fatal(
+            f"Could not determine git URL for {filepath}. No remote '{remote}' found in git repository {repo_root}. "
+            f"Please ensure the file is in a git repository with a configured remote."
+        )
+
+    rel_path = os.path.relpath(filepath, repo_root)
+
+    # Construct git URL: git+https://host/repo@commit#path/to/file
+    git_url = f"git+{remote_url}@{commit_hash}#{rel_path}"
+
+    return git_url
+
+
+def get_file_external_ref(file_path, d):
+    """
+    Creates a File External Reference based on SPDX_FILE_LOCATION setting.
+    Options are 'path' (absolute file path) or 'git' (git URL with commit hash).
+    Reference type is controlled by SPDX_FILE_LOCATION_REF_TYPE.
+    """
+    spdx_file_location = d.getVar("SPDX_FILE_LOCATION")
+
+    # If 'none', external references are disabled
+    if spdx_file_location == "none":
+        return None
+
+    # TODO: Clarify this point:
+    # This is already checked by typecheck.bbcclass. Do we need to support the
+    # use case without inheriting that class?
+    valid_choices = (d.getVarFlag("SPDX_FILE_LOCATION", "choices") or "").split()
+    if spdx_file_location not in valid_choices:
+        bb.fatal(
+            f"Invalid SPDX_FILE_LOCATION='{spdx_file_location}'. "
+            f"Must be one of: {', '.join(valid_choices)}"
+        )
+
+    ref_type = d.getVar("SPDX_FILE_LOCATION_REF_TYPE")
+    if not hasattr(oe.spdx30.ExternalRefType, ref_type):
+        bb.fatal(
+            f"Invalid SPDX_FILE_LOCATION_REF_TYPE = '{ref_type}'. "
+            f"Must be a valid ExternalRefType from SPDX 3.0 spec."
+        )
+
+    ref_type_obj = getattr(oe.spdx30.ExternalRefType, ref_type)
+
+    if spdx_file_location == "path":
+        return oe.sbom30.ExternalRef(ref_type=ref_type_obj, locator=f"{file_path}")
+    elif spdx_file_location == "git":
+        return oe.sbom30.ExternalRef(
+            ref_type=ref_type_obj, locator=get_git_source_url(file_path, d)
+        )
+
+    return None
+
+
 def add_download_files(d, objset):
     inputs = set()
 
@@ -384,6 +471,8 @@  def add_download_files(d, objset):
                             # TODO: SPDX doesn't support symlinks yet
                             continue
 
+                        external_ref = get_file_external_ref(f_path, d)
+
                         file = objset.new_file(
                             objset.new_spdxid(
                                 "source", str(download_idx + 1), str(walk_idx)
@@ -393,17 +482,21 @@  def add_download_files(d, objset):
                             ),
                             f_path,
                             purposes=[primary_purpose],
+                            external_ref=external_ref,
                         )
 
                         inputs.add(file)
                         walk_idx += 1
 
             else:
+                external_ref = get_file_external_ref(fd.localpath, d)
+
                 file = objset.new_file(
                     objset.new_spdxid("source", str(download_idx + 1)),
                     file_name,
                     fd.localpath,
                     purposes=[primary_purpose],
+                    external_ref=external_ref,
                 )
                 inputs.add(file)