| Message ID | a3e600b6d0d61b567c5ba296f66947a9728f296a.1781766061.git.anders.heimer@est.tech |
|---|---|
| State | Under Review |
| Headers | show |
| Series | package: replace copydebugsources shell pipelines | expand |
On Thu, 2026-06-18 at 09:19 +0200, Anders Heimer wrote: > Replace the sort/grep/sed parts of copydebugsources with Python > filtering on the NUL-separated source list. Keep cpio since it is > faster than doing it in python. > > Use an explicit prefix + "/" match before stripping the prefix so source > selection is limited to files in the mapped debug source directory. > > Replace the find/sed symlink fixup pipeline with os.walk() plus cpio, > use an argv-list mv for the externalsrc relocation, and pass the > empty-directory find command as an argv list. > > The first cpio copy pass continues to ignore failures as before since > some inputs are expected to fail. The symlink fixup copy still reports > cpio failures. > > Signed-off-by: Anders Heimer <anders.heimer@est.tech> > --- > meta/lib/oe/package.py | 70 +++++++++++++++++++++++++++++------------- > 1 file changed, 48 insertions(+), 22 deletions(-) > > diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py > index c375acc124..c4ad364b64 100644 > --- a/meta/lib/oe/package.py > +++ b/meta/lib/oe/package.py > @@ -1017,26 +1017,48 @@ def copydebugsources(debugsrcdir, sources, d): > bb.utils.mkdirhier(basepath) > cpath.updatecache(basepath) > > - for pmap in prefixmap: > - # Ignore files from the recipe sysroots (target and native) > - cmd = "LC_ALL=C ; sort -z -u '%s' | egrep -v -z '((<internal>|<built-in>)$|/.*recipe-sysroot.*/)' | " % sourcefile > - # We need to ignore files that are not actually ours > - # we do this by only paying attention to items from this package > - cmd += "fgrep -zw '%s' | " % prefixmap[pmap] > - # Remove prefix in the source paths > - cmd += "sed 's#%s/##g' | " % (prefixmap[pmap]) > - cmd += "(cd '%s' ; cpio -pd0mlLu --no-preserve-owner '%s%s' 2>/dev/null)" % (pmap, dvar, prefixmap[pmap]) > + # Ignore files from the recipe sysroots (target and native), and > + # compiler internal entries. > + with open(sourcefile, "rb") as f: > + sourcepaths = sorted({path for path in f.read().split(b"\0") > + if path > + and not path.endswith((b"<internal>", b"<built-in>")) > + and b"recipe-sysroot" not in os.path.dirname(path)}) > + > + for pmap, prefix in prefixmap.items(): > + dstroot = dvar + prefix > + prefix_slash = os.fsencode(prefix) + b"/" > + relpaths = [path[len(prefix_slash):] > + for path in sourcepaths > + if path.startswith(prefix_slash)] > + > + if relpaths: > + subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot], > + input=b"\0".join(relpaths) + b"\0", > + cwd=pmap, stdout=subprocess.DEVNULL, > + stderr=subprocess.DEVNULL, check=False) I feel like there are a couple of cases of doing a bit too much at once here. It took me a few looks at it to realise that a set comprehension is used to populate sourcepaths, so it's removing duplicate entries. I would find the following more readable: with open(sourcefile, "rb") as f: rawpaths = f.read().split(b"\0") # Ignore files from the recipe sysroots (target and native), and # compiler internal entries. Use a set comprehension to prevent # duplicate entries. sourcepaths = {path for path in rawpaths if path and not path.endswith((b"<internal>", b"<built-in>")) and b"recipe-sysroot" not in os.path.dirname(path)} for pmap, prefix in prefixmap.items(): dstroot = dvar + prefix prefix_slash = os.fsencode(prefix) + b"/" relpaths = [path.removeprefix(prefix_slash) for path in sourcepaths if path.startswith(prefix_slash)] if relpaths: subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot], input=b"\0".join(sorted(relpaths)) + b"\0", cwd=pmap, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False) Our minimum Python version is 3.9 so we can use removeprefix(). And placing the sorted() call in the cpio invocation matches what we do again later in the function. I assume that path.removeprefix(prefix_slash) can never result in a zero-length string because we'll never see an entry in sourcefile exactly matching the prefix with the trailing '/'. The rest of the patch LGTM! Best regards,
diff --git a/meta/lib/oe/package.py b/meta/lib/oe/package.py index c375acc124..c4ad364b64 100644 --- a/meta/lib/oe/package.py +++ b/meta/lib/oe/package.py @@ -1017,26 +1017,48 @@ def copydebugsources(debugsrcdir, sources, d): bb.utils.mkdirhier(basepath) cpath.updatecache(basepath) - for pmap in prefixmap: - # Ignore files from the recipe sysroots (target and native) - cmd = "LC_ALL=C ; sort -z -u '%s' | egrep -v -z '((<internal>|<built-in>)$|/.*recipe-sysroot.*/)' | " % sourcefile - # We need to ignore files that are not actually ours - # we do this by only paying attention to items from this package - cmd += "fgrep -zw '%s' | " % prefixmap[pmap] - # Remove prefix in the source paths - cmd += "sed 's#%s/##g' | " % (prefixmap[pmap]) - cmd += "(cd '%s' ; cpio -pd0mlLu --no-preserve-owner '%s%s' 2>/dev/null)" % (pmap, dvar, prefixmap[pmap]) + # Ignore files from the recipe sysroots (target and native), and + # compiler internal entries. + with open(sourcefile, "rb") as f: + sourcepaths = sorted({path for path in f.read().split(b"\0") + if path + and not path.endswith((b"<internal>", b"<built-in>")) + and b"recipe-sysroot" not in os.path.dirname(path)}) + + for pmap, prefix in prefixmap.items(): + dstroot = dvar + prefix + prefix_slash = os.fsencode(prefix) + b"/" + relpaths = [path[len(prefix_slash):] + for path in sourcepaths + if path.startswith(prefix_slash)] + + if relpaths: + subprocess.run(["cpio", "-pd0mlLu", "--no-preserve-owner", dstroot], + input=b"\0".join(relpaths) + b"\0", + cwd=pmap, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, check=False) - try: - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) - except subprocess.CalledProcessError: - # Can "fail" if internal headers/transient sources are attempted - pass # cpio seems to have a bug with -lL together and symbolic links are just copied, not dereferenced. # Work around this by manually finding and copying any symbolic links that made it through. - cmd = "find %s%s -type l -print0 -delete | sed s#%s%s/##g | (cd '%s' ; cpio -pd0mL --no-preserve-owner '%s%s')" % \ - (dvar, prefixmap[pmap], dvar, prefixmap[pmap], pmap, dvar, prefixmap[pmap]) - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + symlinks = [] + for root, dirs, files in os.walk(dstroot, topdown=True, followlinks=False): + for name in dirs[:]: + path = os.path.join(root, name) + if os.path.islink(path): + symlinks.append(os.fsencode(os.path.relpath(path, dstroot))) + os.unlink(path) + dirs.remove(name) + + for name in files: + path = os.path.join(root, name) + if os.path.islink(path): + symlinks.append(os.fsencode(os.path.relpath(path, dstroot))) + os.unlink(path) + + if symlinks: + subprocess.check_output(["cpio", "-pd0mL", "--no-preserve-owner", dstroot], + input=b"\0".join(sorted(symlinks)) + b"\0", + cwd=pmap, stderr=subprocess.STDOUT) # debugsources.list may be polluted from the host if we used externalsrc, # cpio uses copy-pass and may have just created a directory structure @@ -1046,13 +1068,17 @@ def copydebugsources(debugsrcdir, sources, d): # Same check as above for externalsrc if workdir not in sdir: - if os.path.exists(dvar + debugsrcdir + sdir): - cmd = "mv %s%s%s/* %s%s" % (dvar, debugsrcdir, sdir, dvar,debugsrcdir) - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + srcdir = dvar + debugsrcdir + sdir + dstdir = dvar + debugsrcdir + if os.path.exists(srcdir): + entries = sorted(glob.glob(os.path.join(glob.escape(srcdir), "*"))) + if entries: + subprocess.check_output(["mv", "--"] + entries + [dstdir], + stderr=subprocess.STDOUT) # The copy by cpio may have resulted in some empty directories! Remove these - cmd = "find %s%s -empty -type d -delete" % (dvar, debugsrcdir) - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + cmd = ["find", dvar + debugsrcdir, "-empty", "-type", "d", "-delete"] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) # Also remove debugsrcdir if its empty for p in nosuchdir[::-1]:
Replace the sort/grep/sed parts of copydebugsources with Python filtering on the NUL-separated source list. Keep cpio since it is faster than doing it in python. Use an explicit prefix + "/" match before stripping the prefix so source selection is limited to files in the mapped debug source directory. Replace the find/sed symlink fixup pipeline with os.walk() plus cpio, use an argv-list mv for the externalsrc relocation, and pass the empty-directory find command as an argv list. The first cpio copy pass continues to ignore failures as before since some inputs are expected to fail. The symlink fixup copy still reports cpio failures. Signed-off-by: Anders Heimer <anders.heimer@est.tech> --- meta/lib/oe/package.py | 70 +++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 22 deletions(-)