diff mbox series

[v6,1/3] package: export debugsources in PKGDESTWORK as json

Message ID 20250604112133.2581063-2-daniel.turull@ericsson.com
State New
Headers show
Series Check compiled files to filter kernel CVEs | expand

Commit Message

Daniel Turull June 4, 2025, 11:21 a.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

The source information used during packaging can be use from other tasks to
have more detailed information on the files used during the compilation and
improve SPDX accuracy.

Source files used during compilation are store as compressed zstd json in
pkgdata/debugsources/$PN-debugsources.json.zstd
Format:
{ binary1: [src1, src2, ...], binary2: [src1, src2, ...] }

I checked the sstate size, and it slightly increase using core-image-full-cmdline:
Before: 2454884 B
After: 2456860 B (+1976 B or 0,08%)

CC: Richard Purdie <richard.purdie@linuxfoundation.org>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
---
 meta/lib/oe/package.py | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

Comments

Richard Purdie June 5, 2025, 10:27 a.m. UTC | #1
On Wed, 2025-06-04 at 13:21 +0200, daniel.turull@ericsson.com wrote:
> From: Daniel Turull <daniel.turull@ericsson.com>
> 
> The source information used during packaging can be use from other tasks to
> have more detailed information on the files used during the compilation and
> improve SPDX accuracy.
> 
> Source files used during compilation are store as compressed zstd json in
> pkgdata/debugsources/$PN-debugsources.json.zstd
> Format:
> { binary1: [src1, src2, ...], binary2: [src1, src2, ...] }
> 
> I checked the sstate size, and it slightly increase using core-image-full-cmdline:
> Before: 2454884 B
> After: 2456860 B (+1976 B or 0,08%)
> 
> CC: Richard Purdie <richard.purdie@linuxfoundation.org>
> Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> ---
>  meta/lib/oe/package.py | 46 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 46 insertions(+)
> 
> diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py
> index 0bcc04ea54..c5acb7d205 100644
> --- a/meta/lib/oe/package.py
> +++ b/meta/lib/oe/package.py
> @@ -1049,6 +1049,49 @@ def copydebugsources(debugsrcdir, sources, d):
>              if os.path.exists(p) and not os.listdir(p):
>                  os.rmdir(p)
>  
> +def save_debugsources_info(debugsrcdir, sources_raw, d):
> +    import json
> +    import bb.compress.zstd
> +    if debugsrcdir and sources_raw:
> +        debugsources_file = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
> +        debugsources_dir = os.path.dirname(debugsources_file)
> +        if not os.path.isdir(debugsources_dir):
> +            bb.utils.mkdirhier(debugsources_dir)
> +        bb.utils.remove(debugsources_file)
> +
> +        workdir = d.getVar("WORKDIR")
> +        sdir = d.getVar("S")
> +        pn = d.getVar('PN')
> +        pv = d.getVar('PV')
> +        sources_dict = {}
> +        for file, src_files in sources_raw:
> +            file_clean = file.replace(f"{workdir}/package/","")
> +            sources_clean = [
> +                src.replace(f"/usr/src/debug/{pn}/", "")
> +                   .replace(f"{sdir}/", "")
> +                   .replace(f"/usr/src/kernel/", "")
> +                   .replace(f"/usr/src/{pn}/", "")
> +                   .replace(f"{pn}-{pv}/", "")
> +                   .replace(f"{pv}/", "")
> +                for src in src_files
> +                if not any(keyword in src for keyword in ("<internal>", "<built-in>")) and not src.endswith("/")
> +            ]
> +            sources_dict[file_clean] = sorted(sources_clean)
> +        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
> +        with bb.compress.zstd.open(debugsources_file, "wt", encoding="utf-8", num_threads=num_threads) as f:
> +            json.dump(sources_dict, f, sort_keys=True)
> +
> +def read_debugsources_file(d):
> +    import json
> +    import bb.compress.zstd
> +    try:
> +        fn = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
> +        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
> +        with bb.compress.zstd.open(fn, "rt", encoding="utf-8", num_threads=num_threads) as f:
> +            return json.load(f)
> +    except FileNotFoundError:
> +        bb.debug(1, f"File not found: {fn}")
> +        return None
>  
>  def process_split_and_strip_files(d):
>      cpath = oe.cachedpath.CachedPath()
> @@ -1280,6 +1323,9 @@ def process_split_and_strip_files(d):
>          # Process the dv["srcdir"] if requested...
>          # This copies and places the referenced sources for later debugging...
>          copydebugsources(dv["srcdir"], sources, d)
> +
> +        # Save source info to be accessible to other tasks
> +        save_debugsources_info(dv["srcdir"], results, d)
>      #
>      # End of debug splitting
>      #

This looks good, as I as about to merge it, I realised there is one
issue. bitbake.conf has these entries:

meta/conf/bitbake.conf:oe.packagedata.emit_pkgdata[vardepsexclude] = "BB_NUMBER_THREADS"
meta/conf/bitbake.conf:oe.packagedata.read_subpkgdata_extended[vardepsexclude] = "BB_NUMBER_THREADS"

and I think you need to replicate these for your new functions. The
reason is to stop sstate being dependency on the number of threads
used.

We have had work in process to move these definitions alongside the
function declarations but I'm not sure we've solved that yet so adding
to bitbake.conf is the easiest thing to do for now.

Cheers,

Richard
Daniel Turull June 5, 2025, 11:01 a.m. UTC | #2
Thanks, I’ll do that and resubmit with the changes. Should I resubmit the whole series or only this patch?
Cheers,
Daniel
Richard Purdie June 5, 2025, 11:13 a.m. UTC | #3
On Thu, 2025-06-05 at 11:01 +0000, Daniel Turull wrote:
> Thanks, I’ll do that and resubmit with the changes. Should I resubmit
> the whole series or only this patch?

Just this one is fine. I'm trying to get this sorted, then we can focus
on the others. Joshua is the person I'll look to on the SPDX piece,
then the wider community on the script.

Cheers,

Richard
diff mbox series

Patch

diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py
index 0bcc04ea54..c5acb7d205 100644
--- a/meta/lib/oe/package.py
+++ b/meta/lib/oe/package.py
@@ -1049,6 +1049,49 @@  def copydebugsources(debugsrcdir, sources, d):
             if os.path.exists(p) and not os.listdir(p):
                 os.rmdir(p)
 
+def save_debugsources_info(debugsrcdir, sources_raw, d):
+    import json
+    import bb.compress.zstd
+    if debugsrcdir and sources_raw:
+        debugsources_file = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
+        debugsources_dir = os.path.dirname(debugsources_file)
+        if not os.path.isdir(debugsources_dir):
+            bb.utils.mkdirhier(debugsources_dir)
+        bb.utils.remove(debugsources_file)
+
+        workdir = d.getVar("WORKDIR")
+        sdir = d.getVar("S")
+        pn = d.getVar('PN')
+        pv = d.getVar('PV')
+        sources_dict = {}
+        for file, src_files in sources_raw:
+            file_clean = file.replace(f"{workdir}/package/","")
+            sources_clean = [
+                src.replace(f"/usr/src/debug/{pn}/", "")
+                   .replace(f"{sdir}/", "")
+                   .replace(f"/usr/src/kernel/", "")
+                   .replace(f"/usr/src/{pn}/", "")
+                   .replace(f"{pn}-{pv}/", "")
+                   .replace(f"{pv}/", "")
+                for src in src_files
+                if not any(keyword in src for keyword in ("<internal>", "<built-in>")) and not src.endswith("/")
+            ]
+            sources_dict[file_clean] = sorted(sources_clean)
+        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
+        with bb.compress.zstd.open(debugsources_file, "wt", encoding="utf-8", num_threads=num_threads) as f:
+            json.dump(sources_dict, f, sort_keys=True)
+
+def read_debugsources_file(d):
+    import json
+    import bb.compress.zstd
+    try:
+        fn = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
+        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
+        with bb.compress.zstd.open(fn, "rt", encoding="utf-8", num_threads=num_threads) as f:
+            return json.load(f)
+    except FileNotFoundError:
+        bb.debug(1, f"File not found: {fn}")
+        return None
 
 def process_split_and_strip_files(d):
     cpath = oe.cachedpath.CachedPath()
@@ -1280,6 +1323,9 @@  def process_split_and_strip_files(d):
         # Process the dv["srcdir"] if requested...
         # This copies and places the referenced sources for later debugging...
         copydebugsources(dv["srcdir"], sources, d)
+
+        # Save source info to be accessible to other tasks
+        save_debugsources_info(dv["srcdir"], results, d)
     #
     # End of debug splitting
     #