diff mbox series

[v6,2/3] spdx: add option to include only compiled sources

Message ID 20250604112133.2581063-3-daniel.turull@ericsson.com
State Superseded
Headers show
Series Check compiled files to filter kernel CVEs | expand

Commit Message

Daniel Turull June 4, 2025, 11:21 a.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
source code files that are used during compilation.

It uses debugsource information generated during do_package.

This enables an external tool to use the SPDX information to disregard
vulnerabilities that are not compiled.

As example, when used with the default config with linux-yocto, the spdx size is
reduced from 156MB to 61MB.

Tested with bitbake world on oe-core.

CC: Quentin Schulz <quentin.schulz@cherry.de>
CC: Joshua Watt <JPEWhacker@gmail.com>
CC: Peter Marko <peter.marko@siemens.com>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
---
 meta/classes/create-spdx-2.2.bbclass |  9 ++++++
 meta/classes/spdx-common.bbclass     |  3 ++
 meta/lib/oe/spdx30_tasks.py          | 10 +++++++
 meta/lib/oe/spdx_common.py           | 41 ++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+)

Comments

Joshua Watt June 5, 2025, 3:14 p.m. UTC | #1
On Wed, Jun 4, 2025 at 5:21 AM <daniel.turull@ericsson.com> wrote:
>
> From: Daniel Turull <daniel.turull@ericsson.com>
>
> When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
> source code files that are used during compilation.
>
> It uses debugsource information generated during do_package.
>
> This enables an external tool to use the SPDX information to disregard
> vulnerabilities that are not compiled.
>
> As example, when used with the default config with linux-yocto, the spdx size is
> reduced from 156MB to 61MB.
>
> Tested with bitbake world on oe-core.
>
> CC: Quentin Schulz <quentin.schulz@cherry.de>
> CC: Joshua Watt <JPEWhacker@gmail.com>
> CC: Peter Marko <peter.marko@siemens.com>
> Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> ---
>  meta/classes/create-spdx-2.2.bbclass |  9 ++++++
>  meta/classes/spdx-common.bbclass     |  3 ++
>  meta/lib/oe/spdx30_tasks.py          | 10 +++++++
>  meta/lib/oe/spdx_common.py           | 41 ++++++++++++++++++++++++++++
>  4 files changed, 63 insertions(+)
>
> diff --git a/meta/classes/create-spdx-2.2.bbclass b/meta/classes/create-spdx-2.2.bbclass
> index 7e8f8b9ff5..6fc60a1d97 100644
> --- a/meta/classes/create-spdx-2.2.bbclass
> +++ b/meta/classes/create-spdx-2.2.bbclass
> @@ -137,6 +137,11 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
>      spdx_files = []
>
>      file_counter = 1
> +
> +    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
> +    if check_compiled_sources:
> +        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
> +        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
>      for subdir, dirs, files in os.walk(topdir):
>          dirs[:] = [d for d in dirs if d not in ignore_dirs]
>          if subdir == str(topdir):
> @@ -147,6 +152,10 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
>              filename = str(filepath.relative_to(topdir))
>
>              if not filepath.is_symlink() and filepath.is_file():
> +                # Check if file is compiled
> +                if check_compiled_sources:
> +                     if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
> +                          continue
>                  spdx_file = oe.spdx.SPDXFile()
>                  spdx_file.SPDXID = get_spdxid(file_counter)
>                  for t in get_types(filepath):
> diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
> index 713a7fc651..ca0416d1c7 100644
> --- a/meta/classes/spdx-common.bbclass
> +++ b/meta/classes/spdx-common.bbclass
> @@ -26,6 +26,7 @@ SPDX_TOOL_VERSION ??= "1.0"
>  SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
>
>  SPDX_INCLUDE_SOURCES ??= "0"
> +SPDX_INCLUDE_COMPILED_SOURCES ??= "0"
>
>  SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
>  SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdocs"
> @@ -40,6 +41,8 @@ SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
>  python () {
>      from oe.cve_check import extend_cve_status
>      extend_cve_status(d)
> +    if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
> +        d.setVar("SPDX_INCLUDE_SOURCES", "1")
>  }
>
>  def create_spdx_source_deps(d):
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index 61d7ba45e3..beeafc2bb7 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -156,6 +156,11 @@ def add_package_files(
>          bb.note(f"Skip {topdir}")
>          return spdx_files
>
> +    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
> +    if check_compiled_sources:
> +        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
> +        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
> +
>      for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
>          dirs[:] = [d for d in dirs if d not in ignore_dirs]
>          if subdir == str(topdir):
> @@ -171,6 +176,11 @@ def add_package_files(
>              filename = str(filepath.relative_to(topdir))
>              file_purposes = get_purposes(filepath)
>
> +            # Check if file is compiled
> +            if check_compiled_sources:
> +                if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
> +                    continue
> +
>              spdx_file = objset.new_file(
>                  get_spdxid(file_counter),
>                  filename,
> diff --git a/meta/lib/oe/spdx_common.py b/meta/lib/oe/spdx_common.py
> index 4caefc7673..daf43bce56 100644
> --- a/meta/lib/oe/spdx_common.py
> +++ b/meta/lib/oe/spdx_common.py
> @@ -242,3 +242,44 @@ def fetch_data_to_uri(fd, name):
>          uri = uri + "@" + fd.revision
>
>      return uri
> +
> +def is_compiled_source (filename, compiled_sources, types):
> +    """
> +    Check if the file is a compiled file
> +    """
> +    import os
> +    # If we don't have compiled source, we assume all are compiled.
> +    if len(compiled_sources) == 0:

idiomatically, this would be:

   if not compiled_sources:
     return True

> +        return True
> +    # We remove the top directory, to match the format in compiled sources

This feels a little bit magic, can you explain why removing the top
directory is necessary and or correct to always perform?

> +    relative = filename[filename.find("/")+1:]

you use .find() a lot, but I think split() is more idiomatic:

  _, relative = filename.split("/", 1)

> +    basename = os.path.basename(filename)
> +    # We return always true if the file type is not in the list of compiled files

Why is this?

> +    if basename[basename.find("."):] not in types:

   if "." not in basename or basename.split(".", 1)[1] not in types:
      return True

> +        return True
> +    # Check that the file is in the list
> +    return relative in compiled_sources
> +
> +def get_compiled_sources(d):
> +    """
> +    Get list of compiled sources from debug information and normalize the paths
> +    """
> +    import itertools
> +    source_info = oe.package.read_debugsources_file(d)
> +    if not source_info:
> +        bb.debug(1, "Do not have debugsources.list. Skipping")
> +        return [], []
> +
> +    # Sources are not split now in SPDX, so we aggregate them
> +    sources = list(set(itertools.chain.from_iterable(source_info.values())))

sources should be a set, since you are using the "in" operator, which
is much for efficient for sets than lists (especially when they are
large)

> +    # Check extensions of files
> +    types = []
> +    for src in sources:
> +        basename = os.path.basename(src)
> +        # We check that the basename has an extension
> +        if basename.find(".") > 0:
> +            ext = basename[basename.find("."):]

Similarly:

  if "." in basename:
    stem, ext = basename.split(".", 1)

> +            if ext not in types and len(ext)>0:
> +                types.append(ext)

Making types a set would be more efficient, e.g.

   types = set()
   ...

   if ext:
      types.add(ext)

The "in" operator is much more efficient for sets than lists

> +    bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)} {str(types)}")
> +    return sources, types
Daniel Turull June 9, 2025, 8:34 a.m. UTC | #2
Thanks Joshua for the comments. I'll resend the patch with the fixes after I have verified that it works with a world build.

> -----Original Message-----
> From: Joshua Watt <jpewhacker@gmail.com>
> Sent: Thursday, 5 June 2025 17:15
> To: Daniel Turull <daniel.turull@ericsson.com>
> Cc: openembedded-core@lists.openembedded.org; Quentin Schulz
> <quentin.schulz@cherry.de>; Peter Marko <peter.marko@siemens.com>
> Subject: Re: [PATCH v6 2/3] spdx: add option to include only compiled sources
>
> On Wed, Jun 4, 2025 at 5:21 AM <daniel.turull@ericsson.com> wrote:
> >
> > From: Daniel Turull <daniel.turull@ericsson.com>
> >
> > When SPDX_INCLUDE_COMPILED_SOURCES is enabled, only include the
> source
> > code files that are used during compilation.
> >
> > It uses debugsource information generated during do_package.
> >
> > This enables an external tool to use the SPDX information to disregard
> > vulnerabilities that are not compiled.
> >
> > As example, when used with the default config with linux-yocto, the
> > spdx size is reduced from 156MB to 61MB.
> >
> > Tested with bitbake world on oe-core.
> >
> > CC: Quentin Schulz <quentin.schulz@cherry.de>
> > CC: Joshua Watt <JPEWhacker@gmail.com>
> > CC: Peter Marko <peter.marko@siemens.com>
> > Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> > ---
> >  meta/classes/create-spdx-2.2.bbclass |  9 ++++++
> >  meta/classes/spdx-common.bbclass     |  3 ++
> >  meta/lib/oe/spdx30_tasks.py          | 10 +++++++
> >  meta/lib/oe/spdx_common.py           | 41 ++++++++++++++++++++++++++++
> >  4 files changed, 63 insertions(+)
> >
> > diff --git a/meta/classes/create-spdx-2.2.bbclass
> > b/meta/classes/create-spdx-2.2.bbclass
> > index 7e8f8b9ff5..6fc60a1d97 100644
> > --- a/meta/classes/create-spdx-2.2.bbclass
> > +++ b/meta/classes/create-spdx-2.2.bbclass
> > @@ -137,6 +137,11 @@ def add_package_files(d, doc, spdx_pkg, topdir,
> get_spdxid, get_types, *, archiv
> >      spdx_files = []
> >
> >      file_counter = 1
> > +
> > +    check_compiled_sources =
> d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
> > +    if check_compiled_sources:
> > +        compiled_sources, types =
> oe.spdx_common.get_compiled_sources(d)
> > +        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
> >      for subdir, dirs, files in os.walk(topdir):
> >          dirs[:] = [d for d in dirs if d not in ignore_dirs]
> >          if subdir == str(topdir):
> > @@ -147,6 +152,10 @@ def add_package_files(d, doc, spdx_pkg, topdir,
> get_spdxid, get_types, *, archiv
> >              filename = str(filepath.relative_to(topdir))
> >
> >              if not filepath.is_symlink() and filepath.is_file():
> > +                # Check if file is compiled
> > +                if check_compiled_sources:
> > +                     if not oe.spdx_common.is_compiled_source(filename,
> compiled_sources, types):
> > +                          continue
> >                  spdx_file = oe.spdx.SPDXFile()
> >                  spdx_file.SPDXID = get_spdxid(file_counter)
> >                  for t in get_types(filepath):
> > diff --git a/meta/classes/spdx-common.bbclass
> > b/meta/classes/spdx-common.bbclass
> > index 713a7fc651..ca0416d1c7 100644
> > --- a/meta/classes/spdx-common.bbclass
> > +++ b/meta/classes/spdx-common.bbclass
> > @@ -26,6 +26,7 @@ SPDX_TOOL_VERSION ??= "1.0"
> >  SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
> >
> >  SPDX_INCLUDE_SOURCES ??= "0"
> > +SPDX_INCLUDE_COMPILED_SOURCES ??= "0"
> >
> >  SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
> >  SPDX_NAMESPACE_PREFIX ??=
> "http://spdx.o/
> rg%2Fspdxdocs&data=05%7C02%7Cdaniel.turull%40ericsson.com%7Cc6b5feb
> a17be4428746508dda443bebe%7C92e84cebfbfd47abbe52080c6b87953f%7C
> 0%7C0%7C638847333069033663%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0e
> U1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCI
> sIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=JujsQFQMyjDumPohQKZ8C3Vp
> ECOPPTtwOZE9qqiEaZ0%3D&reserved=0"
> > @@ -40,6 +41,8 @@ SPDX_MULTILIB_SSTATE_ARCHS ??=
> "${SSTATE_ARCHS}"
> >  python () {
> >      from oe.cve_check import extend_cve_status
> >      extend_cve_status(d)
> > +    if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
> > +        d.setVar("SPDX_INCLUDE_SOURCES", "1")
> >  }
> >
> >  def create_spdx_source_deps(d):
> > diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> > index 61d7ba45e3..beeafc2bb7 100644
> > --- a/meta/lib/oe/spdx30_tasks.py
> > +++ b/meta/lib/oe/spdx30_tasks.py
> > @@ -156,6 +156,11 @@ def add_package_files(
> >          bb.note(f"Skip {topdir}")
> >          return spdx_files
> >
> > +    check_compiled_sources =
> d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
> > +    if check_compiled_sources:
> > +        compiled_sources, types =
> oe.spdx_common.get_compiled_sources(d)
> > +        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
> > +
> >      for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
> >          dirs[:] = [d for d in dirs if d not in ignore_dirs]
> >          if subdir == str(topdir):
> > @@ -171,6 +176,11 @@ def add_package_files(
> >              filename = str(filepath.relative_to(topdir))
> >              file_purposes = get_purposes(filepath)
> >
> > +            # Check if file is compiled
> > +            if check_compiled_sources:
> > +                if not oe.spdx_common.is_compiled_source(filename,
> compiled_sources, types):
> > +                    continue
> > +
> >              spdx_file = objset.new_file(
> >                  get_spdxid(file_counter),
> >                  filename,
> > diff --git a/meta/lib/oe/spdx_common.py b/meta/lib/oe/spdx_common.py
> > index 4caefc7673..daf43bce56 100644
> > --- a/meta/lib/oe/spdx_common.py
> > +++ b/meta/lib/oe/spdx_common.py
> > @@ -242,3 +242,44 @@ def fetch_data_to_uri(fd, name):
> >          uri = uri + "@" + fd.revision
> >
> >      return uri
> > +
> > +def is_compiled_source (filename, compiled_sources, types):
> > +    """
> > +    Check if the file is a compiled file
> > +    """
> > +    import os
> > +    # If we don't have compiled source, we assume all are compiled.
> > +    if len(compiled_sources) == 0:
>
> idiomatically, this would be:
>
>    if not compiled_sources:
>      return True

Thanks. I'll change it.

> > +        return True
> > +    # We remove the top directory, to match the format in compiled
> > + sources
>
> This feels a little bit magic, can you explain why removing the top directory is
> necessary and or correct to always perform?

From the data that we extract in the do_packaging, the files could be in a different place, mostly for the kernel,
therefore I have normalized them removing the top directory.

For example for linux-yocto, if I take tcp_ipv4.c
SPDX path:
linux-yocto-6.12.30+git/net/ipv4/tcp_ipv4.c
Packaging path from debugsources without normalization:
"/usr/src/kernel/net/ipv4/tcp_ipv4.c",
Packaging with normalization: net/ipv4/tcp_ipv4.c

For ncurses:
SPDX: git/ncurses/tinfo/comp_scan.c
Packaging: "/usr/src/debug/ncurses/git/ncurses/tinfo/comp_scan.c"
Packaging with normalization:  ncurses/tinfo/comp_scan.c


> > +    relative = filename[filename.find("/")+1:]
>
> you use .find() a lot, but I think split() is more idiomatic:
>
>   _, relative = filename.split("/", 1)

I'll change it for this one.

> > +    basename = os.path.basename(filename)
> > +    # We return always true if the file type is not in the list of
> > + compiled files
>
> Why is this?

For example Makefiles, python files or config files that are not actually compiled, but should be included since are part of the build

> > +    if basename[basename.find("."):] not in types:
>
>    if "." not in basename or basename.split(".", 1)[1] not in types:
>       return True
>
> > +        return True
> > +    # Check that the file is in the list
> > +    return relative in compiled_sources
> > +
> > +def get_compiled_sources(d):
> > +    """
> > +    Get list of compiled sources from debug information and normalize the
> paths
> > +    """
> > +    import itertools
> > +    source_info = oe.package.read_debugsources_file(d)
> > +    if not source_info:
> > +        bb.debug(1, "Do not have debugsources.list. Skipping")
> > +        return [], []
> > +
> > +    # Sources are not split now in SPDX, so we aggregate them
> > +    sources =
> > + list(set(itertools.chain.from_iterable(source_info.values())))
>
> sources should be a set, since you are using the "in" operator, which is much
> for efficient for sets than lists (especially when they are
> large)

I'll change it. That's a good tip.

>
> > +    # Check extensions of files
> > +    types = []
> > +    for src in sources:
> > +        basename = os.path.basename(src)
> > +        # We check that the basename has an extension
> > +        if basename.find(".") > 0:
> > +            ext = basename[basename.find("."):]
>
> Similarly:
>
>   if "." in basename:
>     stem, ext = basename.split(".", 1)
>
> > +            if ext not in types and len(ext)>0:
> > +                types.append(ext)
>
> Making types a set would be more efficient, e.g.

Thanks for the tip. I'll replace the lists with sets.

>    types = set()
>    ...
>
>    if ext:
>       types.add(ext)
>
> The "in" operator is much more efficient for sets than lists
>
> > +    bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)}
> {str(types)}")
> > +    return sources, types
diff mbox series

Patch

diff --git a/meta/classes/create-spdx-2.2.bbclass b/meta/classes/create-spdx-2.2.bbclass
index 7e8f8b9ff5..6fc60a1d97 100644
--- a/meta/classes/create-spdx-2.2.bbclass
+++ b/meta/classes/create-spdx-2.2.bbclass
@@ -137,6 +137,11 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
     spdx_files = []
 
     file_counter = 1
+
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
     for subdir, dirs, files in os.walk(topdir):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -147,6 +152,10 @@  def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
             filename = str(filepath.relative_to(topdir))
 
             if not filepath.is_symlink() and filepath.is_file():
+                # Check if file is compiled
+                if check_compiled_sources:
+                     if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
+                          continue
                 spdx_file = oe.spdx.SPDXFile()
                 spdx_file.SPDXID = get_spdxid(file_counter)
                 for t in get_types(filepath):
diff --git a/meta/classes/spdx-common.bbclass b/meta/classes/spdx-common.bbclass
index 713a7fc651..ca0416d1c7 100644
--- a/meta/classes/spdx-common.bbclass
+++ b/meta/classes/spdx-common.bbclass
@@ -26,6 +26,7 @@  SPDX_TOOL_VERSION ??= "1.0"
 SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
 
 SPDX_INCLUDE_SOURCES ??= "0"
+SPDX_INCLUDE_COMPILED_SOURCES ??= "0"
 
 SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
 SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdocs"
@@ -40,6 +41,8 @@  SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
 python () {
     from oe.cve_check import extend_cve_status
     extend_cve_status(d)
+    if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1":
+        d.setVar("SPDX_INCLUDE_SOURCES", "1")
 }
 
 def create_spdx_source_deps(d):
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index 61d7ba45e3..beeafc2bb7 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -156,6 +156,11 @@  def add_package_files(
         bb.note(f"Skip {topdir}")
         return spdx_files
 
+    check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
+    if check_compiled_sources:
+        compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
+        bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
+
     for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
         if subdir == str(topdir):
@@ -171,6 +176,11 @@  def add_package_files(
             filename = str(filepath.relative_to(topdir))
             file_purposes = get_purposes(filepath)
 
+            # Check if file is compiled
+            if check_compiled_sources:
+                if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
+                    continue
+
             spdx_file = objset.new_file(
                 get_spdxid(file_counter),
                 filename,
diff --git a/meta/lib/oe/spdx_common.py b/meta/lib/oe/spdx_common.py
index 4caefc7673..daf43bce56 100644
--- a/meta/lib/oe/spdx_common.py
+++ b/meta/lib/oe/spdx_common.py
@@ -242,3 +242,44 @@  def fetch_data_to_uri(fd, name):
         uri = uri + "@" + fd.revision
 
     return uri
+
+def is_compiled_source (filename, compiled_sources, types):
+    """
+    Check if the file is a compiled file
+    """
+    import os
+    # If we don't have compiled source, we assume all are compiled.
+    if len(compiled_sources) == 0:
+        return True
+    # We remove the top directory, to match the format in compiled sources
+    relative = filename[filename.find("/")+1:]
+    basename = os.path.basename(filename)
+    # We return always true if the file type is not in the list of compiled files
+    if basename[basename.find("."):] not in types:
+        return True
+    # Check that the file is in the list
+    return relative in compiled_sources
+
+def get_compiled_sources(d):
+    """
+    Get list of compiled sources from debug information and normalize the paths
+    """
+    import itertools
+    source_info = oe.package.read_debugsources_file(d)
+    if not source_info:
+        bb.debug(1, "Do not have debugsources.list. Skipping")
+        return [], []
+
+    # Sources are not split now in SPDX, so we aggregate them
+    sources = list(set(itertools.chain.from_iterable(source_info.values())))
+    # Check extensions of files
+    types = []
+    for src in sources:
+        basename = os.path.basename(src)
+        # We check that the basename has an extension
+        if basename.find(".") > 0:
+            ext = basename[basename.find("."):]
+            if ext not in types and len(ext)>0:
+                types.append(ext)
+    bb.debug(1, f"Num of sources: {len(sources)} and types: {len(types)} {str(types)}")
+    return sources, types