diff mbox series

[v5,2/3] spdx: add option to include only compiled sources

Message ID 20250521134400.1733473-3-daniel.turull@ericsson.com
State New
Headers show
Series Check compiled files to filter kernel CVEs | expand

Commit Message

Daniel Turull May 21, 2025, 1:43 p.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
source code files that are used during compilation.

It uses debugsource information generated during do_package.

This enables an external tool to use the SPDX information to disregard
vulnerabilities that are not compiled.

As example, when used with the default config with linux-yocto, the spdx size is
reduced from 156MB to 61MB.

CC: Quentin Schulz <quentin.schulz@cherry.de>
CC: Joshua Watt <JPEWhacker@gmail.com>
CC: Peter Marko <peter.marko@siemens.com>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
---
 meta/classes/create-spdx-2.2.bbclass |  9 +++++
 meta/classes/spdx-common.bbclass     |  3 ++
 meta/lib/oe/spdx30_tasks.py          | 10 ++++++
 meta/lib/oe/spdx_common.py           | 49 ++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+)
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-2.2.bbclass b/meta/classes/create-spdx-2.2.bbclass
index 7e8f8b9ff5..6fc60a1d97 100644
--- a/meta/classes/create-spdx-2.2.bbclass
+++ b/meta/classes/create-spdx-2.2.bbclass
@@ -137,6 +137,11 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
     spdx_files = []
 
     file_counter = 1
+
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
     for subdir, dirs, files in os.walk(topdir):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -147,6 +152,10 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
             filename = str(filepath.relative_to(topdir))
 
             if not filepath.is_symlink() and filepath.is_file():
+                # Check if file is compiled
+                if check_compiled_sources:
+                     if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
+                          continue
                 spdx_file = oe.spdx.SPDXFile()
                 spdx_file.SPDXID = get_spdxid(file_counter)
                 for t in get_types(filepath):
diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
index 713a7fc651..ca0416d1c7 100644
--- a/meta/classes/spdx-common.bbclass
+++ b/meta/classes/spdx-common.bbclass
@@ -26,6 +26,7 @@  SPDX_TOOL_VERSION ??= "1.0"
 SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
 
 SPDX_INCLUDE_SOURCES ??= "0"
+SPDX_INCLUDE_COMPILED_SOURCES ??= "0"
 
 SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
 SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdocs"
@@ -40,6 +41,8 @@  SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
 python () {
     from oe.cve_check import extend_cve_status
     extend_cve_status(d)
+    if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
+        d.setVar("SPDX_INCLUDE_SOURCES", "1")
 }
 
 def create_spdx_source_deps(d):
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index 61d7ba45e3..beeafc2bb7 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -156,6 +156,11 @@  def add_package_files(
         bb.note(f"Skip {topdir}")
         return spdx_files
 
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
+
     for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -171,6 +176,11 @@  def add_package_files(
             filename = str(filepath.relative_to(topdir))
             file_purposes = get_purposes(filepath)
 
+            # Check if file is compiled
+            if check_compiled_sources:
+                if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
+                    continue
+
             spdx_file = objset.new_file(
                 get_spdxid(file_counter),
                 filename,
diff --git a/meta/lib/oe/spdx_common.py b/meta/lib/oe/spdx_common.py
index 4caefc7673..e4959fb755 100644
--- a/meta/lib/oe/spdx_common.py
+++ b/meta/lib/oe/spdx_common.py
@@ -242,3 +242,52 @@  def fetch_data_to_uri(fd, name):
         uri = uri + "@" + fd.revision
 
     return uri
+
+def is_compiled_source (filename, compiled_sources, types):
+    """
+    Check if the file, is a compiled file
+    """
+    import os
+    # If we don't have compiled source, we assume all are compiled.
+    if len(compiled_sources) == 0:
+        return True
+    # We remove the top directory, to match the format in compiled sources
+    relative = filename[filename.find("/")+1:]
+    basename = os.path.basename(filename)
+    # We return always true if the file type is not in the list of compiled files
+    if basename[basename.find("."):] not in types:
+        return True
+    # Check that the file is in the list
+    return relative in compiled_sources
+
+def get_compiled_sources(d):
+    """
+    Get list of compiled sources from debug information and normalize the paths
+    """
+    sourcefile = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.list")
+    pn = d.getVar('PN')
+    pv = d.getVar('PV')
+
+    if not os.path.isfile(sourcefile):
+        bb.debug(1, "Do not have debugsources.list. Skipping")
+        return [], []
+    with open(sourcefile, 'r') as sf:
+        # We need to normalize the path to match the one in the package
+        # kernel is special case that doesn't match pn
+        # filenames are null-separated - this is an artefact of the previous use
+        # of rpm's debugedit
+        sources = sf.readline()\
+                    .replace(f"/usr/src/debug/{pn}/","")\
+                    .replace(f"/usr/src/kernel/","")\
+                    .replace(f"/usr/src/{pn}/","")\
+                    .replace(f"{pv}/","")\
+                    .split('\0')
+    # Check extensions of files
+    types = []
+    for src in sources:
+        basename = os.path.basename(src)
+        ext = basename[basename.find("."):]
+        if ext not in types and len(ext)>0:
+            types.append(ext)
+    bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)} {str(types)}")
+    return sources, types