| Message ID | 20260324132958.2316491-2-stondo@gmail.com |
|---|---|
| State | Superseded |
| Headers | show |
| Series | SPDX 3.0 SBOM enrichment and compliance improvements | expand |
On Tue, Mar 24, 2026 at 7:30 AM <stondo@gmail.com> wrote: > > From: Stefano Tondo <stefano.tondo.ext@siemens.com> > > Add SPDX_FILE_EXCLUDE_PATTERNS variable that allows filtering files from > SPDX output by regex matching. The variable accepts a space-separated > list of Python regular expressions; files whose paths match any pattern > (via re.search) are excluded. > > When empty (the default), no filtering is applied and all files are > included, preserving existing behavior. > > This enables users to reduce SBOM size by excluding files that are not > relevant for compliance (e.g., test files, object files, patches). > > Excluded files are tracked in a set returned from add_package_files() > and passed to get_package_sources_from_debug(), which uses the set for > precise cross-checking rather than re-evaluating patterns. LGTM, Thanks. Reviewed-by: Joshua Watt <JPEWhacker@gmail.com> > > Signed-off-by: Stefano Tondo <stefano.tondo.ext@siemens.com> > --- > meta/classes/spdx-common.bbclass | 7 +++ > meta/lib/oe/spdx30_tasks.py | 80 +++++++++++++++++++++----------- > 2 files changed, 60 insertions(+), 27 deletions(-) > > diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass > index 83f05579b6..40701730a6 100644 > --- a/meta/classes/spdx-common.bbclass > +++ b/meta/classes/spdx-common.bbclass > @@ -82,6 +82,13 @@ SPDX_MULTILIB_SSTATE_ARCHS[doc] = "The list of sstate architectures to consider > when collecting SPDX dependencies. This includes multilib architectures when \ > multilib is enabled. Defaults to SSTATE_ARCHS." > > +SPDX_FILE_EXCLUDE_PATTERNS ??= "" > +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of Python regular \ > + expressions to exclude files from SPDX output. Files whose paths match \ > + any pattern (via re.search) will be filtered out. Defaults to empty \ > + (no filtering). Example: \ > + SPDX_FILE_EXCLUDE_PATTERNS = '\\.patch$ \\.diff$ /test/ \\.pyc$ \\.o$'" > + > python () { > from oe.cve_check import extend_cve_status > extend_cve_status(d) > diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py > index 353d783fa2..68ed821a8c 100644 > --- a/meta/lib/oe/spdx30_tasks.py > +++ b/meta/lib/oe/spdx30_tasks.py > @@ -13,6 +13,7 @@ import oe.spdx30 > import oe.spdx_common > import oe.sdk > import os > +import re > > from contextlib import contextmanager > from datetime import datetime, timezone > @@ -157,17 +158,27 @@ def add_package_files( > file_counter = 1 > if not os.path.exists(topdir): > bb.note(f"Skip {topdir}") > - return spdx_files > + return spdx_files, set() > > check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1" > if check_compiled_sources: > compiled_sources, types = oe.spdx_common.get_compiled_sources(d) > bb.debug(1, f"Total compiled files: {len(compiled_sources)}") > > + exclude_patterns = [ > + re.compile(pattern) > + for pattern in (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split() > + ] > + excluded_files = set() > + > for subdir, dirs, files in os.walk(topdir, onerror=walk_error): > - dirs[:] = [d for d in dirs if d not in ignore_dirs] > + dirs[:] = [directory for directory in dirs if directory not in ignore_dirs] > if subdir == str(topdir): > - dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] > + dirs[:] = [ > + directory > + for directory in dirs > + if directory not in ignore_top_level_dirs > + ] > > dirs.sort() > files.sort() > @@ -177,14 +188,19 @@ def add_package_files( > continue > > filename = str(filepath.relative_to(topdir)) > + > + if exclude_patterns and any( > + pattern.search(filename) for pattern in exclude_patterns > + ): > + excluded_files.add(filename) > + continue > + > file_purposes = get_purposes(filepath) > > - # Check if file is compiled > - if check_compiled_sources: > - if not oe.spdx_common.is_compiled_source( > - filename, compiled_sources, types > - ): > - continue > + if check_compiled_sources and not oe.spdx_common.is_compiled_source( > + filename, compiled_sources, types > + ): > + continue > > spdx_file = objset.new_file( > get_spdxid(file_counter), > @@ -218,12 +234,15 @@ def add_package_files( > > bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id)) > > - return spdx_files > + return spdx_files, excluded_files > > > def get_package_sources_from_debug( > - d, package, package_files, sources, source_hash_cache > + d, package, package_files, sources, source_hash_cache, excluded_files=None > ): > + if excluded_files is None: > + excluded_files = set() > + > def file_path_match(file_path, pkg_file): > if file_path.lstrip("/") == pkg_file.name.lstrip("/"): > return True > @@ -256,6 +275,12 @@ def get_package_sources_from_debug( > continue > > if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files): > + if file_path.lstrip("/") in excluded_files: > + bb.debug( > + 1, > + f"Skipping debug source lookup for excluded file {file_path} in {package}", > + ) > + continue > bb.fatal( > "No package file found for %s in %s; SPDX found: %s" > % (str(file_path), package, " ".join(p.name for p in package_files)) > @@ -737,7 +762,7 @@ def create_spdx(d): > bb.debug(1, "Adding source files to SPDX") > oe.spdx_common.get_patched_src(d) > > - files = add_package_files( > + files, _ = add_package_files( > d, > build_objset, > spdx_workdir, > @@ -909,7 +934,7 @@ def create_spdx(d): > ) > > bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name) > - package_files = add_package_files( > + package_files, excluded_files = add_package_files( > d, > pkg_objset, > pkgdest / package, > @@ -932,7 +957,8 @@ def create_spdx(d): > > if include_sources: > debug_sources = get_package_sources_from_debug( > - d, package, package_files, dep_sources, source_hash_cache > + d, package, package_files, dep_sources, source_hash_cache, > + excluded_files=excluded_files, > ) > debug_source_ids |= set( > oe.sbom30.get_element_link_id(d) for d in debug_sources > @@ -944,7 +970,7 @@ def create_spdx(d): > > if include_sources: > bb.debug(1, "Adding sysroot files to SPDX") > - sysroot_files = add_package_files( > + sysroot_files, _ = add_package_files( > d, > build_objset, > d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"), > @@ -1326,18 +1352,18 @@ def create_image_spdx(d): > image_filename = image["filename"] > image_path = image_deploy_dir / image_filename > if os.path.isdir(image_path): > - a = add_package_files( > - d, > - objset, > - image_path, > - lambda file_counter: objset.new_spdxid( > - "imagefile", str(file_counter) > - ), > - lambda filepath: [], > - license_data=None, > - ignore_dirs=[], > - ignore_top_level_dirs=[], > - archive=None, > + a, _ = add_package_files( > + d, > + objset, > + image_path, > + lambda file_counter: objset.new_spdxid( > + "imagefile", str(file_counter) > + ), > + lambda filepath: [], > + license_data=None, > + ignore_dirs=[], > + ignore_top_level_dirs=[], > + archive=None, > ) > artifacts.extend(a) > else: > -- > 2.53.0 >
diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass index 83f05579b6..40701730a6 100644 --- a/meta/classes/spdx-common.bbclass +++ b/meta/classes/spdx-common.bbclass @@ -82,6 +82,13 @@ SPDX_MULTILIB_SSTATE_ARCHS[doc] = "The list of sstate architectures to consider when collecting SPDX dependencies. This includes multilib architectures when \ multilib is enabled. Defaults to SSTATE_ARCHS." +SPDX_FILE_EXCLUDE_PATTERNS ??= "" +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of Python regular \ + expressions to exclude files from SPDX output. Files whose paths match \ + any pattern (via re.search) will be filtered out. Defaults to empty \ + (no filtering). Example: \ + SPDX_FILE_EXCLUDE_PATTERNS = '\\.patch$ \\.diff$ /test/ \\.pyc$ \\.o$'" + python () { from oe.cve_check import extend_cve_status extend_cve_status(d) diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py index 353d783fa2..68ed821a8c 100644 --- a/meta/lib/oe/spdx30_tasks.py +++ b/meta/lib/oe/spdx30_tasks.py @@ -13,6 +13,7 @@ import oe.spdx30 import oe.spdx_common import oe.sdk import os +import re from contextlib import contextmanager from datetime import datetime, timezone @@ -157,17 +158,27 @@ def add_package_files( file_counter = 1 if not os.path.exists(topdir): bb.note(f"Skip {topdir}") - return spdx_files + return spdx_files, set() check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1" if check_compiled_sources: compiled_sources, types = oe.spdx_common.get_compiled_sources(d) bb.debug(1, f"Total compiled files: {len(compiled_sources)}") + exclude_patterns = [ + re.compile(pattern) + for pattern in (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split() + ] + excluded_files = set() + for subdir, dirs, files in os.walk(topdir, onerror=walk_error): - dirs[:] = [d for d in dirs if d not in ignore_dirs] + dirs[:] = [directory for directory in dirs if directory not in ignore_dirs] if subdir == str(topdir): - dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] + dirs[:] = [ + directory + for directory in dirs + if directory not in ignore_top_level_dirs + ] dirs.sort() files.sort() @@ -177,14 +188,19 @@ def add_package_files( continue filename = str(filepath.relative_to(topdir)) + + if exclude_patterns and any( + pattern.search(filename) for pattern in exclude_patterns + ): + excluded_files.add(filename) + continue + file_purposes = get_purposes(filepath) - # Check if file is compiled - if check_compiled_sources: - if not oe.spdx_common.is_compiled_source( - filename, compiled_sources, types - ): - continue + if check_compiled_sources and not oe.spdx_common.is_compiled_source( + filename, compiled_sources, types + ): + continue spdx_file = objset.new_file( get_spdxid(file_counter), @@ -218,12 +234,15 @@ def add_package_files( bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id)) - return spdx_files + return spdx_files, excluded_files def get_package_sources_from_debug( - d, package, package_files, sources, source_hash_cache + d, package, package_files, sources, source_hash_cache, excluded_files=None ): + if excluded_files is None: + excluded_files = set() + def file_path_match(file_path, pkg_file): if file_path.lstrip("/") == pkg_file.name.lstrip("/"): return True @@ -256,6 +275,12 @@ def get_package_sources_from_debug( continue if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files): + if file_path.lstrip("/") in excluded_files: + bb.debug( + 1, + f"Skipping debug source lookup for excluded file {file_path} in {package}", + ) + continue bb.fatal( "No package file found for %s in %s; SPDX found: %s" % (str(file_path), package, " ".join(p.name for p in package_files)) @@ -737,7 +762,7 @@ def create_spdx(d): bb.debug(1, "Adding source files to SPDX") oe.spdx_common.get_patched_src(d) - files = add_package_files( + files, _ = add_package_files( d, build_objset, spdx_workdir, @@ -909,7 +934,7 @@ def create_spdx(d): ) bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name) - package_files = add_package_files( + package_files, excluded_files = add_package_files( d, pkg_objset, pkgdest / package, @@ -932,7 +957,8 @@ def create_spdx(d): if include_sources: debug_sources = get_package_sources_from_debug( - d, package, package_files, dep_sources, source_hash_cache + d, package, package_files, dep_sources, source_hash_cache, + excluded_files=excluded_files, ) debug_source_ids |= set( oe.sbom30.get_element_link_id(d) for d in debug_sources @@ -944,7 +970,7 @@ def create_spdx(d): if include_sources: bb.debug(1, "Adding sysroot files to SPDX") - sysroot_files = add_package_files( + sysroot_files, _ = add_package_files( d, build_objset, d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"), @@ -1326,18 +1352,18 @@ def create_image_spdx(d): image_filename = image["filename"] image_path = image_deploy_dir / image_filename if os.path.isdir(image_path): - a = add_package_files( - d, - objset, - image_path, - lambda file_counter: objset.new_spdxid( - "imagefile", str(file_counter) - ), - lambda filepath: [], - license_data=None, - ignore_dirs=[], - ignore_top_level_dirs=[], - archive=None, + a, _ = add_package_files( + d, + objset, + image_path, + lambda file_counter: objset.new_spdxid( + "imagefile", str(file_counter) + ), + lambda filepath: [], + license_data=None, + ignore_dirs=[], + ignore_top_level_dirs=[], + archive=None, ) artifacts.extend(a) else: