@@ -82,6 +82,13 @@ SPDX_MULTILIB_SSTATE_ARCHS[doc] = "The list of sstate architectures to consider
when collecting SPDX dependencies. This includes multilib architectures when \
multilib is enabled. Defaults to SSTATE_ARCHS."
+SPDX_FILE_EXCLUDE_PATTERNS ??= ""
+SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of Python regular \
+ expressions to exclude files from SPDX output. Files whose paths match \
+ any pattern (via re.search) will be filtered out. Defaults to empty \
+ (no filtering). Example: \
+ SPDX_FILE_EXCLUDE_PATTERNS = '\\.patch$ \\.diff$ /test/ \\.pyc$ \\.o$'"
+
python () {
from oe.cve_check import extend_cve_status
extend_cve_status(d)
@@ -13,6 +13,7 @@ import oe.spdx30
import oe.spdx_common
import oe.sdk
import os
+import re
from contextlib import contextmanager
from datetime import datetime, timezone
@@ -157,17 +158,27 @@ def add_package_files(
file_counter = 1
if not os.path.exists(topdir):
bb.note(f"Skip {topdir}")
- return spdx_files
+ return spdx_files, set()
check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
if check_compiled_sources:
compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
+ exclude_patterns = [
+ re.compile(pattern)
+ for pattern in (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
+ ]
+ excluded_files = set()
+
for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
- dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ dirs[:] = [directory for directory in dirs if directory not in ignore_dirs]
if subdir == str(topdir):
- dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
+ dirs[:] = [
+ directory
+ for directory in dirs
+ if directory not in ignore_top_level_dirs
+ ]
dirs.sort()
files.sort()
@@ -177,14 +188,19 @@ def add_package_files(
continue
filename = str(filepath.relative_to(topdir))
+
+ if exclude_patterns and any(
+ pattern.search(filename) for pattern in exclude_patterns
+ ):
+ excluded_files.add(filename)
+ continue
+
file_purposes = get_purposes(filepath)
- # Check if file is compiled
- if check_compiled_sources:
- if not oe.spdx_common.is_compiled_source(
- filename, compiled_sources, types
- ):
- continue
+ if check_compiled_sources and not oe.spdx_common.is_compiled_source(
+ filename, compiled_sources, types
+ ):
+ continue
spdx_file = objset.new_file(
get_spdxid(file_counter),
@@ -218,12 +234,15 @@ def add_package_files(
bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
- return spdx_files
+ return spdx_files, excluded_files
def get_package_sources_from_debug(
- d, package, package_files, sources, source_hash_cache
+ d, package, package_files, sources, source_hash_cache, excluded_files=None
):
+ if excluded_files is None:
+ excluded_files = set()
+
def file_path_match(file_path, pkg_file):
if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
return True
@@ -256,6 +275,12 @@ def get_package_sources_from_debug(
continue
if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
+ if file_path.lstrip("/") in excluded_files:
+ bb.debug(
+ 1,
+ f"Skipping debug source lookup for excluded file {file_path} in {package}",
+ )
+ continue
bb.fatal(
"No package file found for %s in %s; SPDX found: %s"
% (str(file_path), package, " ".join(p.name for p in package_files))
@@ -737,7 +762,7 @@ def create_spdx(d):
bb.debug(1, "Adding source files to SPDX")
oe.spdx_common.get_patched_src(d)
- files = add_package_files(
+ files, _ = add_package_files(
d,
build_objset,
spdx_workdir,
@@ -909,7 +934,7 @@ def create_spdx(d):
)
bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
- package_files = add_package_files(
+ package_files, excluded_files = add_package_files(
d,
pkg_objset,
pkgdest / package,
@@ -932,7 +957,8 @@ def create_spdx(d):
if include_sources:
debug_sources = get_package_sources_from_debug(
- d, package, package_files, dep_sources, source_hash_cache
+ d, package, package_files, dep_sources, source_hash_cache,
+ excluded_files=excluded_files,
)
debug_source_ids |= set(
oe.sbom30.get_element_link_id(d) for d in debug_sources
@@ -944,7 +970,7 @@ def create_spdx(d):
if include_sources:
bb.debug(1, "Adding sysroot files to SPDX")
- sysroot_files = add_package_files(
+ sysroot_files, _ = add_package_files(
d,
build_objset,
d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
@@ -1326,18 +1352,18 @@ def create_image_spdx(d):
image_filename = image["filename"]
image_path = image_deploy_dir / image_filename
if os.path.isdir(image_path):
- a = add_package_files(
- d,
- objset,
- image_path,
- lambda file_counter: objset.new_spdxid(
- "imagefile", str(file_counter)
- ),
- lambda filepath: [],
- license_data=None,
- ignore_dirs=[],
- ignore_top_level_dirs=[],
- archive=None,
+ a, _ = add_package_files(
+ d,
+ objset,
+ image_path,
+ lambda file_counter: objset.new_spdxid(
+ "imagefile", str(file_counter)
+ ),
+ lambda filepath: [],
+ license_data=None,
+ ignore_dirs=[],
+ ignore_top_level_dirs=[],
+ archive=None,
)
artifacts.extend(a)
else:
Add SPDX_FILE_EXCLUDE_PATTERNS variable that allows filtering files from SPDX output by regex matching. The variable accepts a space-separated list of Python regular expressions; files whose paths match any pattern (via re.search) are excluded. When empty (the default), no filtering is applied and all files are included, preserving existing behavior. This enables users to reduce SBOM size by excluding files that are not relevant for compliance (e.g., test files, object files, patches). Excluded files are tracked in a set returned from add_package_files() and passed to get_package_sources_from_debug(), which uses the set for precise cross-checking rather than re-evaluating patterns. Signed-off-by: Stefano Tondo <stefano.tondo.ext@siemens.com> --- meta/classes/spdx-common.bbclass | 7 +++ meta/lib/oe/spdx30_tasks.py | 80 +++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 27 deletions(-)