diff mbox series

[v2,1/2] package: replace copydebugsources shell pipelines

Message ID a3e600b6d0d61b567c5ba296f66947a9728f296a.1781766061.git.anders.heimer@est.tech
State Under Review
Headers show
Series package: replace copydebugsources shell pipelines | expand

Commit Message

Anders Heimer June 18, 2026, 7:19 a.m. UTC
Replace the sort/grep/sed parts of copydebugsources with Python
filtering on the NUL-separated source list. Keep cpio since it is
faster than doing it in python.

Use an explicit prefix + "/" match before stripping the prefix so source
selection is limited to files in the mapped debug source directory.

Replace the find/sed symlink fixup pipeline with os.walk() plus cpio,
use an argv-list mv for the externalsrc relocation, and pass the
empty-directory find command as an argv list.

The first cpio copy pass continues to ignore failures as before since
some inputs are expected to fail. The symlink fixup copy still reports
cpio failures.

Signed-off-by: Anders Heimer <anders.heimer@est.tech>
---
 meta/lib/oe/package.py | 70 +++++++++++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 22 deletions(-)

Comments

Paul Barker June 18, 2026, 2:02 p.m. UTC | #1
On Thu, 2026-06-18 at 09:19 +0200, Anders Heimer wrote:
> Replace the sort/grep/sed parts of copydebugsources with Python
> filtering on the NUL-separated source list. Keep cpio since it is
> faster than doing it in python.
> 
> Use an explicit prefix + "/" match before stripping the prefix so source
> selection is limited to files in the mapped debug source directory.
> 
> Replace the find/sed symlink fixup pipeline with os.walk() plus cpio,
> use an argv-list mv for the externalsrc relocation, and pass the
> empty-directory find command as an argv list.
> 
> The first cpio copy pass continues to ignore failures as before since
> some inputs are expected to fail. The symlink fixup copy still reports
> cpio failures.
> 
> Signed-off-by: Anders Heimer <anders.heimer@est.tech>
> ---
>  meta/lib/oe/package.py | 70 +++++++++++++++++++++++++++++-------------
>  1 file changed, 48 insertions(+), 22 deletions(-)
> 
> diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py
> index c375acc124..c4ad364b64 100644
> --- a/meta/lib/oe/package.py
> +++ b/meta/lib/oe/package.py
> @@ -1017,26 +1017,48 @@ def copydebugsources(debugsrcdir, sources, d):
>          bb.utils.mkdirhier(basepath)
>          cpath.updatecache(basepath)
>  
> -        for pmap in prefixmap:
> -            # Ignore files from the recipe sysroots (target and native)
> -            cmd =  "LC_ALL=C ; sort -z -u '%s' | egrep -v -z '((<internal>|<built-in>)$|/.*recipe-sysroot.*/)' | " % sourcefile
> -            # We need to ignore files that are not actually ours
> -            # we do this by only paying attention to items from this package
> -            cmd += "fgrep -zw '%s' | " % prefixmap[pmap]
> -            # Remove prefix in the source paths
> -            cmd += "sed 's#%s/##g' | " % (prefixmap[pmap])
> -            cmd += "(cd '%s' ; cpio -pd0mlLu --no-preserve-owner '%s%s' 2>/dev/null)" % (pmap, dvar, prefixmap[pmap])
> +        # Ignore files from the recipe sysroots (target and native), and
> +        # compiler internal entries.
> +        with open(sourcefile, "rb") as f:
> +            sourcepaths = sorted({path for path in f.read().split(b"\0")
> +                                  if path
> +                                  and not path.endswith((b"<internal>", b"<built-in>"))
> +                                  and b"recipe-sysroot" not in os.path.dirname(path)})
> +
> +        for pmap, prefix in prefixmap.items():
> +            dstroot = dvar + prefix
> +            prefix_slash = os.fsencode(prefix) + b"/"
> +            relpaths = [path[len(prefix_slash):]
> +                        for path in sourcepaths
> +                        if path.startswith(prefix_slash)]
> +
> +            if relpaths:
> +                subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot],
> +                               input=b"\0".join(relpaths) + b"\0",
> +                               cwd=pmap, stdout=subprocess.DEVNULL,
> +                               stderr=subprocess.DEVNULL, check=False)

I feel like there are a couple of cases of doing a bit too much at once
here. It took me a few looks at it to realise that a set comprehension
is used to populate sourcepaths, so it's removing duplicate entries.

I would find the following more readable:

    with open(sourcefile, "rb") as f:
        rawpaths = f.read().split(b"\0")

    # Ignore files from the recipe sysroots (target and native), and
    # compiler internal entries. Use a set comprehension to prevent
    # duplicate entries.
    sourcepaths = {path for path in rawpaths
                   if path
                   and not path.endswith((b"<internal>", b"<built-in>"))
                   and b"recipe-sysroot" not in os.path.dirname(path)}

    for pmap, prefix in prefixmap.items():
        dstroot = dvar + prefix
        prefix_slash = os.fsencode(prefix) + b"/"
        relpaths = [path.removeprefix(prefix_slash) for path in sourcepaths
                    if path.startswith(prefix_slash)]

        if relpaths:
            subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot],
                           input=b"\0".join(sorted(relpaths)) + b"\0",
                           cwd=pmap, stdout=subprocess.DEVNULL,
                           stderr=subprocess.DEVNULL, check=False)

Our minimum Python version is 3.9 so we can use removeprefix(). And
placing the sorted() call in the cpio invocation matches what we do
again later in the function.

I assume that path.removeprefix(prefix_slash) can never result in a
zero-length string because we'll never see an entry in sourcefile
exactly matching the prefix with the trailing '/'.

The rest of the patch LGTM!

Best regards,
diff mbox series

Patch

diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py
index c375acc124..c4ad364b64 100644
--- a/meta/lib/oe/package.py
+++ b/meta/lib/oe/package.py
@@ -1017,26 +1017,48 @@  def copydebugsources(debugsrcdir, sources, d):
         bb.utils.mkdirhier(basepath)
         cpath.updatecache(basepath)
 
-        for pmap in prefixmap:
-            # Ignore files from the recipe sysroots (target and native)
-            cmd =  "LC_ALL=C ; sort -z -u '%s' | egrep -v -z '((<internal>|<built-in>)$|/.*recipe-sysroot.*/)' | " % sourcefile
-            # We need to ignore files that are not actually ours
-            # we do this by only paying attention to items from this package
-            cmd += "fgrep -zw '%s' | " % prefixmap[pmap]
-            # Remove prefix in the source paths
-            cmd += "sed 's#%s/##g' | " % (prefixmap[pmap])
-            cmd += "(cd '%s' ; cpio -pd0mlLu --no-preserve-owner '%s%s' 2>/dev/null)" % (pmap, dvar, prefixmap[pmap])
+        # Ignore files from the recipe sysroots (target and native), and
+        # compiler internal entries.
+        with open(sourcefile, "rb") as f:
+            sourcepaths = sorted({path for path in f.read().split(b"\0")
+                                  if path
+                                  and not path.endswith((b"<internal>", b"<built-in>"))
+                                  and b"recipe-sysroot" not in os.path.dirname(path)})
+
+        for pmap, prefix in prefixmap.items():
+            dstroot = dvar + prefix
+            prefix_slash = os.fsencode(prefix) + b"/"
+            relpaths = [path[len(prefix_slash):]
+                        for path in sourcepaths
+                        if path.startswith(prefix_slash)]
+
+            if relpaths:
+                subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot],
+                               input=b"\0".join(relpaths) + b"\0",
+                               cwd=pmap, stdout=subprocess.DEVNULL,
+                               stderr=subprocess.DEVNULL, check=False)
 
-            try:
-                subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
-            except subprocess.CalledProcessError:
-                # Can "fail" if internal headers/transient sources are attempted
-                pass
             # cpio seems to have a bug with -lL together and symbolic links are just copied, not dereferenced.
             # Work around this by manually finding and copying any symbolic links that made it through.
-            cmd = "find %s%s -type l -print0 -delete | sed s#%s%s/##g | (cd '%s' ; cpio -pd0mL --no-preserve-owner '%s%s')" % \
-                    (dvar, prefixmap[pmap], dvar, prefixmap[pmap], pmap, dvar, prefixmap[pmap])
-            subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+            symlinks = []
+            for root, dirs, files in os.walk(dstroot, topdown=True, followlinks=False):
+                for name in dirs[:]:
+                    path = os.path.join(root, name)
+                    if os.path.islink(path):
+                        symlinks.append(os.fsencode(os.path.relpath(path, dstroot)))
+                        os.unlink(path)
+                        dirs.remove(name)
+
+                for name in files:
+                    path = os.path.join(root, name)
+                    if os.path.islink(path):
+                        symlinks.append(os.fsencode(os.path.relpath(path, dstroot)))
+                        os.unlink(path)
+
+            if symlinks:
+                subprocess.check_output(["cpio", "-pd0mL", "--no-preserve-owner", dstroot],
+                                        input=b"\0".join(sorted(symlinks)) + b"\0",
+                                        cwd=pmap, stderr=subprocess.STDOUT)
 
         # debugsources.list may be polluted from the host if we used externalsrc,
         # cpio uses copy-pass and may have just created a directory structure
@@ -1046,13 +1068,17 @@  def copydebugsources(debugsrcdir, sources, d):
 
         # Same check as above for externalsrc
         if workdir not in sdir:
-            if os.path.exists(dvar + debugsrcdir + sdir):
-                cmd = "mv %s%s%s/* %s%s" % (dvar, debugsrcdir, sdir, dvar,debugsrcdir)
-                subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+            srcdir = dvar + debugsrcdir + sdir
+            dstdir = dvar + debugsrcdir
+            if os.path.exists(srcdir):
+                entries = sorted(glob.glob(os.path.join(glob.escape(srcdir), "*")))
+                if entries:
+                    subprocess.check_output(["mv", "--"] + entries + [dstdir],
+                                            stderr=subprocess.STDOUT)
 
         # The copy by cpio may have resulted in some empty directories!  Remove these
-        cmd = "find %s%s -empty -type d -delete" % (dvar, debugsrcdir)
-        subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
+        cmd = ["find", dvar + debugsrcdir, "-empty", "-type", "d", "-delete"]
+        subprocess.check_output(cmd, stderr=subprocess.STDOUT)
 
         # Also remove debugsrcdir if its empty
         for p in nosuchdir[::-1]: