diff mbox series

[v4,1/3] spdx: add option to include only compiled sources

Message ID 20250514131146.501451-2-daniel.turull@ericsson.com
State New
Headers show
Series Check compiled files to filter kernel CVEs | expand

Commit Message

Daniel Turull May 14, 2025, 1:11 p.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
source code (.c) files that are used during compilation.

This enables an external tool to use the SPDX information to disregard
vulnerabilities that are not compiled.

This commit adds the basics, so recipes can implement it own methods.

CC: Joshua Watt <JPEWhacker@gmail.com>
CC: Peter Marko <peter.marko@siemens.com>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
---
 meta/classes/create-spdx-2.2.bbclass |  9 ++++++++
 meta/classes/spdx-common.bbclass     |  3 +++
 meta/lib/oe/spdx30_tasks.py          |  9 ++++++++
 meta/lib/oe/spdx_common.py           | 33 ++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+)
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-2.2.bbclass b/meta/classes/create-spdx-2.2.bbclass
index 7e8f8b9ff5..dd8ee6ecbe 100644
--- a/meta/classes/create-spdx-2.2.bbclass
+++ b/meta/classes/create-spdx-2.2.bbclass
@@ -137,6 +137,11 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
     spdx_files = []
 
     file_counter = 1
+
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
     for subdir, dirs, files in os.walk(topdir):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -147,6 +152,10 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
             filename = str(filepath.relative_to(topdir))
 
             if not filepath.is_symlink() and filepath.is_file():
+                # Check if file is compiled
+                if check_compiled_sources:
+                     if not oe.spdx_common.is_compiled_source(file, compiled_sources):
+                          break
                 spdx_file = oe.spdx.SPDXFile()
                 spdx_file.SPDXID = get_spdxid(file_counter)
                 for t in get_types(filepath):
diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
index 713a7fc651..e9dde34513 100644
--- a/meta/classes/spdx-common.bbclass
+++ b/meta/classes/spdx-common.bbclass
@@ -26,6 +26,9 @@  SPDX_TOOL_VERSION ??= "1.0"
 SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
 
 SPDX_INCLUDE_SOURCES ??= "0"
+SPDX_INCLUDE_COMPILED_SOURCES ??= "0"
+SPDX_COMPILED_SOURCES_DIR ??= "${LOG_DIR}/spdx-compiled/${PN}"
+SPDX_COMPILED_SOURCES ??= "${SPDX_FILES_DIR}/compiled_src-${BP}.txt"
 
 SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
 SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdocs"
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index 61d7ba45e3..083e004330 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -156,6 +156,11 @@  def add_package_files(
         bb.note(f"Skip {topdir}")
         return spdx_files
 
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
+
     for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -167,6 +172,10 @@  def add_package_files(
             filepath = Path(subdir) / file
             if filepath.is_symlink() or not filepath.is_file():
                 continue
+            # Check if file is compiled
+            if check_compiled_sources:
+                 if not oe.spdx_common.is_compiled_source(file, compiled_sources):
+                      break
 
             filename = str(filepath.relative_to(topdir))
             file_purposes = get_purposes(filepath)
diff --git a/meta/lib/oe/spdx_common.py b/meta/lib/oe/spdx_common.py
index 4caefc7673..e1b7f576dd 100644
--- a/meta/lib/oe/spdx_common.py
+++ b/meta/lib/oe/spdx_common.py
@@ -242,3 +242,36 @@  def fetch_data_to_uri(fd, name):
         uri = uri + "@" + fd.revision
 
     return uri
+
+
+def is_compiled_source (filename, compiled_sources):
+    """
+    Check if the file, is a compiled file
+    """
+    import os
+    # If we don't have compiled source, we asume all are compiled.
+    if len(compiled_sources) == 0:
+        return True
+    _, extension = os.path.splitext(filename)
+    # Special case, that we need to ignore, since this is not a source file
+    # We filter .c files
+    if filename.rfind(".mod.c") > 0 or extension != ".c":
+        return True
+    # Check that the c file is in the list
+    if filename in compiled_sources:
+        return True
+    return False
+
+def get_compiled_sources(d):
+    """
+    Return compiled files from the SPDX_COMPILED_FILES file
+    """
+    cfiles = []
+    sources = d.getVar('SPDX_COMPILED_SOURCES')
+    if not sources:
+        return cfiles
+    if not os.path.isfile(sources):
+        return cfiles
+    with open(sources, 'r') as f:
+        cfiles = [line.strip() for line in f]
+    return cfiles