diff mbox series

[v8,1/7] spdx30: Add configurable file exclusion pattern support

Message ID 20260309132854.128375-2-stondo@gmail.com
State Under Review
Headers show
Series SPDX 3.0 SBOM enrichment and compliance improvements | expand

Commit Message

Stefano Tondo March 9, 2026, 1:28 p.m. UTC
From: Stefano Tondo <stefano.tondo.ext@siemens.com>

Add SPDX_FILE_EXCLUDE_PATTERNS variable that allows filtering files from
SPDX output by pattern matching. The variable accepts a space-separated
list of patterns; files whose paths contain any pattern are excluded.

When empty (the default), no filtering is applied and all files are
included, preserving existing behavior.

This enables users to reduce SBOM size by excluding files that are not
relevant for compliance (e.g., test files, object files, patches).

When file exclusion is active, debug source lookups that reference
filtered files are gracefully skipped instead of causing fatal errors.

Signed-off-by: Stefano Tondo <stefano.tondo.ext@siemens.com>
---
 meta/classes/spdx-common.bbclass |  6 ++++++
 meta/lib/oe/spdx30_tasks.py      | 28 ++++++++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
index 3110230c9e..f54459d3b4 100644
--- a/meta/classes/spdx-common.bbclass
+++ b/meta/classes/spdx-common.bbclass
@@ -54,6 +54,12 @@  SPDX_CONCLUDED_LICENSE[doc] = "The license concluded by manual or external \
 
 SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
 
+SPDX_FILE_EXCLUDE_PATTERNS ??= ""
+SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of patterns to exclude \
+    from SPDX file output. Files whose paths contain any of these patterns will \
+    be filtered out. Defaults to empty (no filtering). Example: \
+    SPDX_FILE_EXCLUDE_PATTERNS = '.patch .diff /test/ .pyc .o'"
+
 python () {
     from oe.cve_check import extend_cve_status
     extend_cve_status(d)
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index 99f2892dfb..5ced792d71 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -161,6 +161,9 @@  def add_package_files(
         compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
         bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
 
+    # File exclusion filtering
+    exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
+
     for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -174,6 +177,13 @@  def add_package_files(
                 continue
 
             filename = str(filepath.relative_to(topdir))
+
+            # Apply file exclusion filtering
+            if exclude_patterns:
+                filename_lower = filename.lower()
+                if any(pattern in filename_lower for pattern in exclude_patterns):
+                    continue
+
             file_purposes = get_purposes(filepath)
 
             # Check if file is compiled
@@ -219,6 +229,8 @@  def add_package_files(
 def get_package_sources_from_debug(
     d, package, package_files, sources, source_hash_cache
 ):
+    exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
+
     def file_path_match(file_path, pkg_file):
         if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
             return True
@@ -251,10 +263,18 @@  def get_package_sources_from_debug(
             continue
 
         if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
-            bb.fatal(
-                "No package file found for %s in %s; SPDX found: %s"
-                % (str(file_path), package, " ".join(p.name for p in package_files))
-            )
+            # When file exclusion patterns are active, some files may be filtered out
+            if exclude_patterns:
+                bb.debug(
+                    1,
+                    f"Skipping debug source lookup for {file_path} in {package} (file exclusion active)",
+                )
+                continue
+            else:
+                bb.fatal(
+                    "No package file found for %s in %s; SPDX found: %s"
+                    % (str(file_path), package, " ".join(p.name for p in package_files))
+                )
             continue
 
         for debugsrc in file_data["debugsrc"]: