diff mbox series

[v7] package: export debugsources in PKGDESTWORK as json

Message ID 20250605120306.3692449-1-daniel.turull@ericsson.com
State Accepted, archived
Commit c507dcb8a8780a42bfe68b1ebaff0909b4236e6b
Headers show
Series [v7] package: export debugsources in PKGDESTWORK as json | expand

Commit Message

Daniel Turull June 5, 2025, 12:03 p.m. UTC
From: Daniel Turull <daniel.turull@ericsson.com>

The source information used during packaging can be use from other tasks to
have more detailed information on the files used during the compilation and
improve SPDX accuracy.

Source files used during compilation are store as compressed zstd json in
pkgdata/debugsources/$PN-debugsources.json.zstd
Format:
{ binary1: [src1, src2, ...], binary2: [src1, src2, ...] }

I checked the sstate size, and it slightly increase using core-image-full-cmdline:
Before: 2454884 B
After: 2456860 B (+1976 B or 0,08%)

CC: Richard Purdie <richard.purdie@linuxfoundation.org>
Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
---
 meta/conf/bitbake.conf |  2 ++
 meta/lib/oe/package.py | 46 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

Comments

Ross Burton June 5, 2025, 1:06 p.m. UTC | #1
On 5 Jun 2025, at 13:03, Daniel Turull via lists.openembedded.org <daniel.turull=ericsson.com@lists.openembedded.org> wrote:
> 
> I checked the sstate size, and it slightly increase using core-image-full-cmdline:
> Before: 2454884 B
> After: 2456860 B (+1976 B or 0,08%)

2000 _bytes_ for adding all of the source information to the core-image-full-cmdline sstate?  I know zstd is good but that seems a little low?

Ross
Daniel Turull June 5, 2025, 1:11 p.m. UTC | #2
Hi, now I noticed that I put the wrong unit when reading du -s.
It is Kbytes. So the increase is 1976 KB, not 1976 Bytes

Best regards,
Daniel

> -----Original Message-----
> From: Ross Burton <Ross.Burton@arm.com>
> Sent: Thursday, 5 June 2025 15:06
> To: Daniel Turull <daniel.turull@ericsson.com>
> Cc: openembedded-core@lists.openembedded.org; Richard Purdie
> <richard.purdie@linuxfoundation.org>
> Subject: Re: [OE-core] [PATCH v7] package: export debugsources in
> PKGDESTWORK as json
> 
> On 5 Jun 2025, at 13:03, Daniel Turull via lists.openembedded.org
> <daniel.turull=ericsson.com@lists.openembedded.org> wrote:
> >
> > I checked the sstate size, and it slightly increase using core-image-full-cmdline:
> > Before: 2454884 B
> > After: 2456860 B (+1976 B or 0,08%)
> 
> 2000 _bytes_ for adding all of the source information to the core-image-full-
> cmdline sstate?  I know zstd is good but that seems a little low?
> 
> Ross
Ross Burton June 5, 2025, 1:58 p.m. UTC | #3
Hi Daniel,

> On 5 Jun 2025, at 14:11, Daniel Turull <daniel.turull@ericsson.com> wrote:
> Hi, now I noticed that I put the wrong unit when reading du -s.
> It is Kbytes. So the increase is 1976 KB, not 1976 Bytes

That makes a lot more sense and is an acceptable cost for that useful data.

Thanks,
Ross
Daniel Turull June 9, 2025, 8:17 a.m. UTC | #4
> -----Original Message-----
> From: Daniel Turull <daniel.turull@ericsson.com>
> Sent: Thursday, 5 June 2025 14:03
> To: openembedded-core@lists.openembedded.org
> Cc: Daniel Turull <daniel.turull@ericsson.com>; Richard Purdie
> <richard.purdie@linuxfoundation.org>
> Subject: [PATCH v7] package: export debugsources in PKGDESTWORK as json
> 
> From: Daniel Turull <daniel.turull@ericsson.com>
> 
> The source information used during packaging can be use from other tasks to
> have more detailed information on the files used during the compilation and
> improve SPDX accuracy.
> 
> Source files used during compilation are store as compressed zstd json in
> pkgdata/debugsources/$PN-debugsources.json.zstd
> Format:
> { binary1: [src1, src2, ...], binary2: [src1, src2, ...] }
> 
> I checked the sstate size, and it slightly increase using core-image-full-cmdline:
> Before: 2454884 B
> After: 2456860 B (+1976 B or 0,08%)
> 
> CC: Richard Purdie <richard.purdie@linuxfoundation.org>
> Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> ---
>  meta/conf/bitbake.conf |  2 ++
>  meta/lib/oe/package.py | 46
> ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf index
> 13d55923b7..8c0c71d76d 100644
> --- a/meta/conf/bitbake.conf
> +++ b/meta/conf/bitbake.conf
> @@ -991,5 +991,7 @@ oe.sstatesig.find_sstate_manifest[vardepsexclude] =
> "BBEXTENDCURR BBEXTENDVARIAN
> oe.utils.get_multilib_datastore[vardepsexclude] =
> "DEFAULTTUNE_MULTILIB_ORIGINAL OVERRIDES"
>  oe.path.format_display[vardepsexclude] = "TOPDIR"
>  oe.utils.get_bb_number_threads[vardepsexclude] =
> "BB_NUMBER_THREADS"
> +oe.package.save_debugsources_info[vardepsexclude] =
> "BB_NUMBER_THREADS"
> +oe.package.read_debugsources_file[vardepsexclude] =
> "BB_NUMBER_THREADS"
>  oe.packagedata.emit_pkgdata[vardepsexclude] = "BB_NUMBER_THREADS"
>  oe.packagedata.read_subpkgdata_extended[vardepsexclude] =
> "BB_NUMBER_THREADS"
> diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py index
> 0bcc04ea54..c5acb7d205 100644
> --- a/meta/lib/oe/package.py
> +++ b/meta/lib/oe/package.py
> @@ -1049,6 +1049,49 @@ def copydebugsources(debugsrcdir, sources, d):
>              if os.path.exists(p) and not os.listdir(p):
>                  os.rmdir(p)
> 
> +def save_debugsources_info(debugsrcdir, sources_raw, d):
> +    import json
> +    import bb.compress.zstd
> +    if debugsrcdir and sources_raw:
> +        debugsources_file = d.expand("${PKGDESTWORK}/debugsources/${PN}-
> debugsources.json.zstd")
> +        debugsources_dir = os.path.dirname(debugsources_file)
> +        if not os.path.isdir(debugsources_dir):
> +            bb.utils.mkdirhier(debugsources_dir)
> +        bb.utils.remove(debugsources_file)
> +
> +        workdir = d.getVar("WORKDIR")
> +        sdir = d.getVar("S")
> +        pn = d.getVar('PN')
> +        pv = d.getVar('PV')
> +        sources_dict = {}
> +        for file, src_files in sources_raw:
> +            file_clean = file.replace(f"{workdir}/package/","")
> +            sources_clean = [
> +                src.replace(f"/usr/src/debug/{pn}/", "")
> +                   .replace(f"{sdir}/", "")
> +                   .replace(f"/usr/src/kernel/", "")
> +                   .replace(f"/usr/src/{pn}/", "")
> +                   .replace(f"{pn}-{pv}/", "")
> +                   .replace(f"{pv}/", "")

When looking at Joshua's comments, I relalized that I missed to remove "git". I'll send a new version.

> +                for src in src_files
> +                if not any(keyword in src for keyword in ("<internal>", "<built-in>"))
> and not src.endswith("/")
> +            ]
> +            sources_dict[file_clean] = sorted(sources_clean)
> +        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
> +        with bb.compress.zstd.open(debugsources_file, "wt", encoding="utf-8",
> num_threads=num_threads) as f:
> +            json.dump(sources_dict, f, sort_keys=True)
> +
> +def read_debugsources_file(d):
> +    import json
> +    import bb.compress.zstd
> +    try:
> +        fn = d.expand("${PKGDESTWORK}/debugsources/${PN}-
> debugsources.json.zstd")
> +        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
> +        with bb.compress.zstd.open(fn, "rt", encoding="utf-8",
> num_threads=num_threads) as f:
> +            return json.load(f)
> +    except FileNotFoundError:
> +        bb.debug(1, f"File not found: {fn}")
> +        return None
> 
>  def process_split_and_strip_files(d):
>      cpath = oe.cachedpath.CachedPath()
> @@ -1280,6 +1323,9 @@ def process_split_and_strip_files(d):
>          # Process the dv["srcdir"] if requested...
>          # This copies and places the referenced sources for later debugging...
>          copydebugsources(dv["srcdir"], sources, d)
> +
> +        # Save source info to be accessible to other tasks
> +        save_debugsources_info(dv["srcdir"], results, d)
>      #
>      # End of debug splitting
>      #
Richard Purdie June 9, 2025, 8:56 a.m. UTC | #5
On Mon, 2025-06-09 at 08:17 +0000, Daniel Turull wrote:
> 
> 
> > -----Original Message-----
> > From: Daniel Turull <daniel.turull@ericsson.com>
> > Sent: Thursday, 5 June 2025 14:03
> > To: openembedded-core@lists.openembedded.org
> > Cc: Daniel Turull <daniel.turull@ericsson.com>; Richard Purdie
> > <richard.purdie@linuxfoundation.org>
> > Subject: [PATCH v7] package: export debugsources in PKGDESTWORK as json
> > 
> > From: Daniel Turull <daniel.turull@ericsson.com>
> > 
> > The source information used during packaging can be use from other tasks to
> > have more detailed information on the files used during the compilation and
> > improve SPDX accuracy.
> > 
> > Source files used during compilation are store as compressed zstd json in
> > pkgdata/debugsources/$PN-debugsources.json.zstd
> > Format:
> > { binary1: [src1, src2, ...], binary2: [src1, src2, ...] }
> > 
> > I checked the sstate size, and it slightly increase using core-image-full-cmdline:
> > Before: 2454884 B
> > After: 2456860 B (+1976 B or 0,08%)
> > 
> > CC: Richard Purdie <richard.purdie@linuxfoundation.org>
> > Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> > ---
> >  meta/conf/bitbake.conf |  2 ++
> >  meta/lib/oe/package.py | 46
> > ++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 48 insertions(+)
> > 
> > diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf index
> > 13d55923b7..8c0c71d76d 100644
> > --- a/meta/conf/bitbake.conf
> > +++ b/meta/conf/bitbake.conf
> > @@ -991,5 +991,7 @@ oe.sstatesig.find_sstate_manifest[vardepsexclude] =
> > "BBEXTENDCURR BBEXTENDVARIAN
> > oe.utils.get_multilib_datastore[vardepsexclude] =
> > "DEFAULTTUNE_MULTILIB_ORIGINAL OVERRIDES"
> >  oe.path.format_display[vardepsexclude] = "TOPDIR"
> >  oe.utils.get_bb_number_threads[vardepsexclude] =
> > "BB_NUMBER_THREADS"
> > +oe.package.save_debugsources_info[vardepsexclude] =
> > "BB_NUMBER_THREADS"
> > +oe.package.read_debugsources_file[vardepsexclude] =
> > "BB_NUMBER_THREADS"
> >  oe.packagedata.emit_pkgdata[vardepsexclude] = "BB_NUMBER_THREADS"
> >  oe.packagedata.read_subpkgdata_extended[vardepsexclude] =
> > "BB_NUMBER_THREADS"
> > diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py index
> > 0bcc04ea54..c5acb7d205 100644
> > --- a/meta/lib/oe/package.py
> > +++ b/meta/lib/oe/package.py
> > @@ -1049,6 +1049,49 @@ def copydebugsources(debugsrcdir, sources, d):
> >              if os.path.exists(p) and not os.listdir(p):
> >                  os.rmdir(p)
> > 
> > +def save_debugsources_info(debugsrcdir, sources_raw, d):
> > +    import json
> > +    import bb.compress.zstd
> > +    if debugsrcdir and sources_raw:
> > +        debugsources_file = d.expand("${PKGDESTWORK}/debugsources/${PN}-
> > debugsources.json.zstd")
> > +        debugsources_dir = os.path.dirname(debugsources_file)
> > +        if not os.path.isdir(debugsources_dir):
> > +            bb.utils.mkdirhier(debugsources_dir)
> > +        bb.utils.remove(debugsources_file)
> > +
> > +        workdir = d.getVar("WORKDIR")
> > +        sdir = d.getVar("S")
> > +        pn = d.getVar('PN')
> > +        pv = d.getVar('PV')
> > +        sources_dict = {}
> > +        for file, src_files in sources_raw:
> > +            file_clean = file.replace(f"{workdir}/package/","")
> > +            sources_clean = [
> > +                src.replace(f"/usr/src/debug/{pn}/", "")
> > +                   .replace(f"{sdir}/", "")
> > +                   .replace(f"/usr/src/kernel/", "")
> > +                   .replace(f"/usr/src/{pn}/", "")
> > +                   .replace(f"{pn}-{pv}/", "")
> > +                   .replace(f"{pv}/", "")
> 
> When looking at Joshua's comments, I relalized that I missed to remove "git". I'll send a new version.

I'm a bit apprehensive about this "magic" set of replacements. In
general, sdir (S) is going to represent what you're after. B (the build
directory) will also show up here and can probably be mapped the same
way as S for the purposes of what you're doing. Files in B will almost
always be generated.

/usr/src/xxx should also be possible to be read from a variable rather
than hardcoded. I can understand why the kernel ends up special but I'm
not sure why there would be two other usr/src options.

I'm therefore left wondering why the other pieces come from and if we
can remove more of the magic values here.

Cheers,

Richard
Daniel Turull June 9, 2025, 9:01 a.m. UTC | #6
> -----Original Message-----
> From: Richard Purdie <richard.purdie@linuxfoundation.org>
> Sent: Monday, 9 June 2025 10:57
> To: Daniel Turull <daniel.turull@ericsson.com>; openembedded-
> core@lists.openembedded.org
> Subject: Re: [PATCH v7] package: export debugsources in PKGDESTWORK as
> json
> 
> On Mon, 2025-06-09 at 08:17 +0000, Daniel Turull wrote:
> >
> >
> > > -----Original Message-----
> > > From: Daniel Turull <daniel.turull@ericsson.com>
> > > Sent: Thursday, 5 June 2025 14:03
> > > To: openembedded-core@lists.openembedded.org
> > > Cc: Daniel Turull <daniel.turull@ericsson.com>; Richard Purdie
> > > <richard.purdie@linuxfoundation.org>
> > > Subject: [PATCH v7] package: export debugsources in PKGDESTWORK as
> > > json
> > >
> > > From: Daniel Turull <daniel.turull@ericsson.com>
> > >
> > > The source information used during packaging can be use from other
> > > tasks to have more detailed information on the files used during the
> > > compilation and improve SPDX accuracy.
> > >
> > > Source files used during compilation are store as compressed zstd
> > > json in pkgdata/debugsources/$PN-debugsources.json.zstd
> > > Format:
> > > { binary1: [src1, src2, ...], binary2: [src1, src2, ...] }
> > >
> > > I checked the sstate size, and it slightly increase using core-image-full-
> cmdline:
> > > Before: 2454884 B
> > > After: 2456860 B (+1976 B or 0,08%)
> > >
> > > CC: Richard Purdie <richard.purdie@linuxfoundation.org>
> > > Signed-off-by: Daniel Turull <daniel.turull@ericsson.com>
> > > ---
> > >  meta/conf/bitbake.conf |  2 ++
> > >  meta/lib/oe/package.py | 46
> > > ++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 48 insertions(+)
> > >
> > > diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf index
> > > 13d55923b7..8c0c71d76d 100644
> > > --- a/meta/conf/bitbake.conf
> > > +++ b/meta/conf/bitbake.conf
> > > @@ -991,5 +991,7 @@
> > > oe.sstatesig.find_sstate_manifest[vardepsexclude] = "BBEXTENDCURR
> > > BBEXTENDVARIAN oe.utils.get_multilib_datastore[vardepsexclude] =
> > > "DEFAULTTUNE_MULTILIB_ORIGINAL OVERRIDES"
> > >  oe.path.format_display[vardepsexclude] = "TOPDIR"
> > >  oe.utils.get_bb_number_threads[vardepsexclude] =
> > > "BB_NUMBER_THREADS"
> > > +oe.package.save_debugsources_info[vardepsexclude] =
> > > "BB_NUMBER_THREADS"
> > > +oe.package.read_debugsources_file[vardepsexclude] =
> > > "BB_NUMBER_THREADS"
> > >  oe.packagedata.emit_pkgdata[vardepsexclude] =
> "BB_NUMBER_THREADS"
> > >  oe.packagedata.read_subpkgdata_extended[vardepsexclude] =
> > > "BB_NUMBER_THREADS"
> > > diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py index
> > > 0bcc04ea54..c5acb7d205 100644
> > > --- a/meta/lib/oe/package.py
> > > +++ b/meta/lib/oe/package.py
> > > @@ -1049,6 +1049,49 @@ def copydebugsources(debugsrcdir, sources,
> d):
> > >              if os.path.exists(p) and not os.listdir(p):
> > >                  os.rmdir(p)
> > >
> > > +def save_debugsources_info(debugsrcdir, sources_raw, d):
> > > +    import json
> > > +    import bb.compress.zstd
> > > +    if debugsrcdir and sources_raw:
> > > +        debugsources_file =
> > > +d.expand("${PKGDESTWORK}/debugsources/${PN}-
> > > debugsources.json.zstd")
> > > +        debugsources_dir = os.path.dirname(debugsources_file)
> > > +        if not os.path.isdir(debugsources_dir):
> > > +            bb.utils.mkdirhier(debugsources_dir)
> > > +        bb.utils.remove(debugsources_file)
> > > +
> > > +        workdir = d.getVar("WORKDIR")
> > > +        sdir = d.getVar("S")
> > > +        pn = d.getVar('PN')
> > > +        pv = d.getVar('PV')
> > > +        sources_dict = {}
> > > +        for file, src_files in sources_raw:
> > > +            file_clean = file.replace(f"{workdir}/package/","")
> > > +            sources_clean = [
> > > +                src.replace(f"/usr/src/debug/{pn}/", "")
> > > +                   .replace(f"{sdir}/", "")
> > > +                   .replace(f"/usr/src/kernel/", "")
> > > +                   .replace(f"/usr/src/{pn}/", "")
> > > +                   .replace(f"{pn}-{pv}/", "")
> > > +                   .replace(f"{pv}/", "")
> >
> > When looking at Joshua's comments, I relalized that I missed to remove
> "git". I'll send a new version.
> 
> I'm a bit apprehensive about this "magic" set of replacements. In general, sdir
> (S) is going to represent what you're after. B (the build
> directory) will also show up here and can probably be mapped the same way
> as S for the purposes of what you're doing. Files in B will almost always be
> generated.
> 
> /usr/src/xxx should also be possible to be read from a variable rather than
> hardcoded. I can understand why the kernel ends up special but I'm not sure
> why there would be two other usr/src options.
> 
> I'm therefore left wondering why the other pieces come from and if we can
> remove more of the magic values here.

I'll look at the "magic" and see if we can do the same with variables and remove it also from the SPDX patch.

I'll try to send a new version later this week, without "magic".

Thanks for taking the time to look at it. I feel the series has been improved quite a lot since my first version.

Daniel

> Cheers,
> 
> Richard
diff mbox series

Patch

diff --git a/meta/conf/bitbake.conf b/meta/conf/bitbake.conf
index 13d55923b7..8c0c71d76d 100644
--- a/meta/conf/bitbake.conf
+++ b/meta/conf/bitbake.conf
@@ -991,5 +991,7 @@  oe.sstatesig.find_sstate_manifest[vardepsexclude] = "BBEXTENDCURR BBEXTENDVARIAN
 oe.utils.get_multilib_datastore[vardepsexclude] = "DEFAULTTUNE_MULTILIB_ORIGINAL OVERRIDES"
 oe.path.format_display[vardepsexclude] = "TOPDIR"
 oe.utils.get_bb_number_threads[vardepsexclude] = "BB_NUMBER_THREADS"
+oe.package.save_debugsources_info[vardepsexclude] = "BB_NUMBER_THREADS"
+oe.package.read_debugsources_file[vardepsexclude] = "BB_NUMBER_THREADS"
 oe.packagedata.emit_pkgdata[vardepsexclude] = "BB_NUMBER_THREADS"
 oe.packagedata.read_subpkgdata_extended[vardepsexclude] = "BB_NUMBER_THREADS"
diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py
index 0bcc04ea54..c5acb7d205 100644
--- a/meta/lib/oe/package.py
+++ b/meta/lib/oe/package.py
@@ -1049,6 +1049,49 @@  def copydebugsources(debugsrcdir, sources, d):
             if os.path.exists(p) and not os.listdir(p):
                 os.rmdir(p)
 
+def save_debugsources_info(debugsrcdir, sources_raw, d):
+    import json
+    import bb.compress.zstd
+    if debugsrcdir and sources_raw:
+        debugsources_file = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
+        debugsources_dir = os.path.dirname(debugsources_file)
+        if not os.path.isdir(debugsources_dir):
+            bb.utils.mkdirhier(debugsources_dir)
+        bb.utils.remove(debugsources_file)
+
+        workdir = d.getVar("WORKDIR")
+        sdir = d.getVar("S")
+        pn = d.getVar('PN')
+        pv = d.getVar('PV')
+        sources_dict = {}
+        for file, src_files in sources_raw:
+            file_clean = file.replace(f"{workdir}/package/","")
+            sources_clean = [
+                src.replace(f"/usr/src/debug/{pn}/", "")
+                   .replace(f"{sdir}/", "")
+                   .replace(f"/usr/src/kernel/", "")
+                   .replace(f"/usr/src/{pn}/", "")
+                   .replace(f"{pn}-{pv}/", "")
+                   .replace(f"{pv}/", "")
+                for src in src_files
+                if not any(keyword in src for keyword in ("<internal>", "<built-in>")) and not src.endswith("/")
+            ]
+            sources_dict[file_clean] = sorted(sources_clean)
+        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
+        with bb.compress.zstd.open(debugsources_file, "wt", encoding="utf-8", num_threads=num_threads) as f:
+            json.dump(sources_dict, f, sort_keys=True)
+
+def read_debugsources_file(d):
+    import json
+    import bb.compress.zstd
+    try:
+        fn = d.expand("${PKGDESTWORK}/debugsources/${PN}-debugsources.json.zstd")
+        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
+        with bb.compress.zstd.open(fn, "rt", encoding="utf-8", num_threads=num_threads) as f:
+            return json.load(f)
+    except FileNotFoundError:
+        bb.debug(1, f"File not found: {fn}")
+        return None
 
 def process_split_and_strip_files(d):
     cpath = oe.cachedpath.CachedPath()
@@ -1280,6 +1323,9 @@  def process_split_and_strip_files(d):
         # Process the dv["srcdir"] if requested...
         # This copies and places the referenced sources for later debugging...
         copydebugsources(dv["srcdir"], sources, d)
+
+        # Save source info to be accessible to other tasks
+        save_debugsources_info(dv["srcdir"], results, d)
     #
     # End of debug splitting
     #