diff mbox series

[scarthgap,7/9] spdx: add option to include only compiled sources

Message ID 5396e9b81e88ac2b1141c7f5dd7a7e6ba4fc73f9.1751466215.git.steve@sakoman.com
State RFC
Delegated to: Steve Sakoman
Headers show
Series [scarthgap,1/9] go: fix CVE-2025-4673 | expand

Commit Message

Steve Sakoman July 2, 2025, 2:25 p.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
source code files that are used during compilation.

It uses debugsource information generated during do_package.

This enables an external tool to use the SPDX information to disregard
vulnerabilities that are not compiled.

As example, when used with the default config with linux-yocto, the spdx size is
reduced from 156MB to 61MB.

Tested with bitbake world on oe-core.

(From OE-Core rev: c6a2f1fca76fae4c3ea471a0c63d0b453beea968)
Adapted to existing files for create-spdx-2.2

CC: Mathieu Dubois-Briand <mathieu.dubois-briand@bootlin.com>
CC: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
---
 meta/classes/create-spdx-2.2.bbclass | 12 ++++++++
 meta/lib/oe/spdx.py                  | 42 ++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-2.2.bbclass b/meta/classes/create-spdx-2.2.bbclass
index ade1a04be3..1fc11ad7ac 100644
--- a/meta/classes/create-spdx-2.2.bbclass
+++ b/meta/classes/create-spdx-2.2.bbclass
@@ -100,6 +100,9 @@  python() {
         # Transform the license array to a dictionary
         data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
         d.setVar("SPDX_LICENSE_DATA", data)
+
+    if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
+        d.setVar("SPDX_INCLUDE_SOURCES", "1")
 }
 
 def convert_license_to_spdx(lic, document, d, existing={}):
@@ -215,6 +218,11 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
     spdx_files = []
 
     file_counter = 1
+
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources, types = oe.spdx.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
     for subdir, dirs, files in os.walk(topdir):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -225,6 +233,10 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
             filename = str(filepath.relative_to(topdir))
 
             if not filepath.is_symlink() and filepath.is_file():
+                # Check if file is compiled
+                if check_compiled_sources:
+                     if not oe.spdx.is_compiled_source(filename, compiled_sources, types):
+                          continue
                 spdx_file = oe.spdx.SPDXFile()
                 spdx_file.SPDXID = get_spdxid(file_counter)
                 for t in get_types(filepath):
diff --git a/meta/lib/oe/spdx.py b/meta/lib/oe/spdx.py
index 7aaf2af5ed..92dcd2da05 100644
--- a/meta/lib/oe/spdx.py
+++ b/meta/lib/oe/spdx.py
@@ -355,3 +355,45 @@  class SPDXDocument(SPDXObject):
             if r.spdxDocument == namespace:
                 return r
         return None
+
+def is_compiled_source (filename, compiled_sources, types):
+    """
+    Check if the file is a compiled file
+    """
+    import os
+    # If we don't have compiled source, we assume all are compiled.
+    if not compiled_sources:
+        return True
+
+    # We return always true if the file type is not in the list of compiled files.
+    # Some files in the source directory are not compiled, for example, Makefiles,
+    # but also python .py file. We need to include them in the SPDX.
+    basename = os.path.basename(filename)
+    ext = basename.partition(".")[2]
+    if ext not in types:
+        return True
+    # Check that the file is in the list
+    return filename in compiled_sources
+
+def get_compiled_sources(d):
+    """
+    Get list of compiled sources from debug information and normalize the paths
+    """
+    import itertools
+    import oe.package
+    source_info = oe.package.read_debugsources_info(d)
+    if not source_info:
+        bb.debug(1, "Do not have debugsources.list. Skipping")
+        return [], []
+
+    # Sources are not split now in SPDX, so we aggregate them
+    sources = set(itertools.chain.from_iterable(source_info.values()))
+    # Check extensions of files
+    types = set()
+    for src in sources:
+        basename = os.path.basename(src)
+        ext = basename.partition(".")[2]
+        if ext not in types and ext:
+            types.add(ext)
+    bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)} {str(types)}")
+    return sources, types