diff mbox series

[yocto-autobuilder-helper,v2,01/10] scripts/utils: fix stale extraction dir when tarball is updated

Message ID ee99d892018112109ab1b0e6f3ec8899918bdc52.1780354513.git.tim.orling@konsulko.com
State New
Headers show
Series [yocto-autobuilder-helper,v2,01/10] scripts/utils: fix stale extraction dir when tarball is updated | expand

Commit Message

Tim Orling June 1, 2026, 11:18 p.m. UTC
From: Tim Orling <tim.orling@konsulko.com>

Previously the entire download/cache-validation block in
setup_tools_tarball() was guarded by `if not os.path.exists(btdir)`.
Once the extraction directory existed from a prior build, every
subsequent call was a no-op: the cached tarball was never re-validated,
so a freshly-published SDK (e.g. vcontainer-tarball-latest) was silently
ignored and the stale btdir kept being used.

Fix by moving the lock/download block outside the btdir existence guard
so cache validation always runs. Track tarball_updated to know when the
cached download was actually replaced, then remove the stale btdir when
True so the fresh tarball is re-extracted.

Also add an mtime-based staleness check for local-path (cp) sources so
that a newer source file automatically invalidates the cached copy.

AI-Generated: Claude Cowork Sonnet 4.6
Signed-off-by: Tim Orling <tim.orling@konsulko.com>
---
 scripts/utils.py | 64 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 23 deletions(-)

Comments

Paul Barker June 2, 2026, 10:22 a.m. UTC | #1
On Mon, 2026-06-01 at 16:18 -0700, Tim Orling via lists.yoctoproject.org
wrote:
> From: Tim Orling <tim.orling@konsulko.com>
> 
> Previously the entire download/cache-validation block in
> setup_tools_tarball() was guarded by `if not os.path.exists(btdir)`.
> Once the extraction directory existed from a prior build, every
> subsequent call was a no-op: the cached tarball was never re-validated,
> so a freshly-published SDK (e.g. vcontainer-tarball-latest) was silently
> ignored and the stale btdir kept being used.
> 
> Fix by moving the lock/download block outside the btdir existence guard
> so cache validation always runs. Track tarball_updated to know when the
> cached download was actually replaced, then remove the stale btdir when
> True so the fresh tarball is re-extracted.
> 
> Also add an mtime-based staleness check for local-path (cp) sources so
> that a newer source file automatically invalidates the cached copy.
> 
> AI-Generated: Claude Cowork Sonnet 4.6
> Signed-off-by: Tim Orling <tim.orling@konsulko.com>
> ---
>  scripts/utils.py | 64 +++++++++++++++++++++++++++++++-----------------
>  1 file changed, 41 insertions(+), 23 deletions(-)
> 
> diff --git a/scripts/utils.py b/scripts/utils.py
> index 88842f0..87acad6 100644
> --- a/scripts/utils.py
> +++ b/scripts/utils.py
> @@ -481,31 +481,49 @@ def setup_tools_tarball(ourconfig, btdir, bttarball, name="buildtools"):
>          if ";" in bttarball:
>              bttarball, sha256 = bttarball.split(";")
>          btdir = os.path.abspath(btdir)
> +        btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) + "/cluster-downloads-cache/" + os.path.basename(bttarball)
> +        btlock = btdlpath + ".lock"
> +        if not os.path.exists(os.path.dirname(btdlpath)):
> +            os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
> +        # Always run the cache-validation / download step so that a freshly
> +        # published tarball is detected even when btdir already exists from a
> +        # previous build.  tarball_updated is set to True whenever the cached
> +        # download is replaced, which triggers removal of the stale btdir.
> +        tarball_updated = False
> +        while True:
> +            try:
> +                with open(btlock, 'a+') as lf:
> +                    fileno = lf.fileno()
> +                    fcntl.flock(fileno, fcntl.LOCK_EX)
> +                    if sha256 and os.path.exists(btdlpath):
> +                        dl_sha256 = sha256_file(btdlpath)
> +                        if dl_sha256 != sha256:
> +                            os.unlink(btdlpath)
> +                    elif bttarball.startswith("/") and os.path.exists(btdlpath):
> +                        # For local-path sources (e.g. vcontainer-tarball-latest)
> +                        # invalidate the cached copy when the source is newer so
> +                        # that a freshly-published tarball is always picked up.
> +                        if os.path.getmtime(bttarball) > os.path.getmtime(btdlpath):
> +                            os.unlink(btdlpath)
> +                        os.unlink(btdlpath)
> +                    if not os.path.exists(btdlpath):
> +                        if bttarball.startswith("/"):
> +                            subprocess.check_call(["cp", bttarball, btdlpath])
> +                        else:
> +                            subprocess.check_call(["wget", "-O", btdlpath, bttarball])
> +                        os.chmod(btdlpath, 0o775)
> +                        tarball_updated = True
> +                break
> +            except OSError:
> +                # We raced with someone else, try again
> +                pass

Hi Tim,

This loop allows us to handle random OSError conditions caused by a
race, but it turns a deterministic OSError into an infinite loop. The
logic already exists, you're just moving it around, but now I have seen
it I cannot unsee it!

Should we add a timeout while we are here?

> +        # If the underlying tarball changed, remove any stale extraction
> +        # directory so it is re-extracted below.
> +        if tarball_updated and os.path.exists(btdir):
> +            print("Removing stale %s extraction at %s" % (name, btdir))
> +            subprocess.check_call(["rm", "-rf", btdir])
>          if not os.path.exists(btdir):
> -            btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) + "/cluster-downloads-cache/" + os.path.basename(bttarball)
>              print("Extracting %s %s" % (name, bttarball))
> -            btlock = btdlpath + ".lock"
> -            if not os.path.exists(os.path.dirname(btdlpath)):
> -                os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
> -            while True:
> -                try:
> -                    with open(btlock, 'a+') as lf:
> -                        fileno = lf.fileno()
> -                        fcntl.flock(fileno, fcntl.LOCK_EX)
> -                        if sha256 and os.path.exists(btdlpath):
> -                            dl_sha256 = sha256_file(btdlpath)
> -                            if dl_sha256 != sha256:
> -                                os.unlink(btdlpath)
> -                        if not os.path.exists(btdlpath):
> -                            if bttarball.startswith("/"):
> -                                subprocess.check_call(["cp", bttarball, btdlpath])
> -                            else:
> -                                subprocess.check_call(["wget", "-O", btdlpath, bttarball])
> -                            os.chmod(btdlpath, 0o775)
> -                    break
> -                except OSError:
> -                    # We raced with someone else, try again
> -                    pass
>              subprocess.check_call(["bash", btdlpath, "-d", btdir, "-y"])
>          enable_tools_tarball(btdir, name)

Best regards,
Tim Orling June 6, 2026, 3:15 a.m. UTC | #2
On Tue, Jun 2, 2026 at 3:22 AM Paul Barker via lists.yoctoproject.org <paul=
pbarker.dev@lists.yoctoproject.org> wrote:

> On Mon, 2026-06-01 at 16:18 -0700, Tim Orling via lists.yoctoproject.org
> wrote:
> > From: Tim Orling <tim.orling@konsulko.com>
> >
> > Previously the entire download/cache-validation block in
> > setup_tools_tarball() was guarded by `if not os.path.exists(btdir)`.
> > Once the extraction directory existed from a prior build, every
> > subsequent call was a no-op: the cached tarball was never re-validated,
> > so a freshly-published SDK (e.g. vcontainer-tarball-latest) was silently
> > ignored and the stale btdir kept being used.
> >
> > Fix by moving the lock/download block outside the btdir existence guard
> > so cache validation always runs. Track tarball_updated to know when the
> > cached download was actually replaced, then remove the stale btdir when
> > True so the fresh tarball is re-extracted.
> >
> > Also add an mtime-based staleness check for local-path (cp) sources so
> > that a newer source file automatically invalidates the cached copy.
> >
> > AI-Generated: Claude Cowork Sonnet 4.6
> > Signed-off-by: Tim Orling <tim.orling@konsulko.com>
> > ---
> >  scripts/utils.py | 64 +++++++++++++++++++++++++++++++-----------------
> >  1 file changed, 41 insertions(+), 23 deletions(-)
> >
> > diff --git a/scripts/utils.py b/scripts/utils.py
> > index 88842f0..87acad6 100644
> > --- a/scripts/utils.py
> > +++ b/scripts/utils.py
> > @@ -481,31 +481,49 @@ def setup_tools_tarball(ourconfig, btdir,
> bttarball, name="buildtools"):
> >          if ";" in bttarball:
> >              bttarball, sha256 = bttarball.split(";")
> >          btdir = os.path.abspath(btdir)
> > +        btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) +
> "/cluster-downloads-cache/" + os.path.basename(bttarball)
> > +        btlock = btdlpath + ".lock"
> > +        if not os.path.exists(os.path.dirname(btdlpath)):
> > +            os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
> > +        # Always run the cache-validation / download step so that a
> freshly
> > +        # published tarball is detected even when btdir already exists
> from a
> > +        # previous build.  tarball_updated is set to True whenever the
> cached
> > +        # download is replaced, which triggers removal of the stale
> btdir.
> > +        tarball_updated = False
> > +        while True:
> > +            try:
> > +                with open(btlock, 'a+') as lf:
> > +                    fileno = lf.fileno()
> > +                    fcntl.flock(fileno, fcntl.LOCK_EX)
> > +                    if sha256 and os.path.exists(btdlpath):
> > +                        dl_sha256 = sha256_file(btdlpath)
> > +                        if dl_sha256 != sha256:
> > +                            os.unlink(btdlpath)
> > +                    elif bttarball.startswith("/") and
> os.path.exists(btdlpath):
> > +                        # For local-path sources (e.g.
> vcontainer-tarball-latest)
> > +                        # invalidate the cached copy when the source is
> newer so
> > +                        # that a freshly-published tarball is always
> picked up.
> > +                        if os.path.getmtime(bttarball) >
> os.path.getmtime(btdlpath):
> > +                            os.unlink(btdlpath)
> > +                        os.unlink(btdlpath)
> > +                    if not os.path.exists(btdlpath):
> > +                        if bttarball.startswith("/"):
> > +                            subprocess.check_call(["cp", bttarball,
> btdlpath])
> > +                        else:
> > +                            subprocess.check_call(["wget", "-O",
> btdlpath, bttarball])
> > +                        os.chmod(btdlpath, 0o775)
> > +                        tarball_updated = True
> > +                break
> > +            except OSError:
> > +                # We raced with someone else, try again
> > +                pass
>
> Hi Tim,
>
> This loop allows us to handle random OSError conditions caused by a
> race, but it turns a deterministic OSError into an infinite loop. The
> logic already exists, you're just moving it around, but now I have seen
> it I cannot unsee it!
>
> Should we add a timeout while we are here?
>

This patch was already merged, but the timeout concern was submitted
separately:
https://lore.kernel.org/yocto-patches/20260606011918.61582-1-tim.orling@konsulko.com/


>
> > +        # If the underlying tarball changed, remove any stale extraction
> > +        # directory so it is re-extracted below.
> > +        if tarball_updated and os.path.exists(btdir):
> > +            print("Removing stale %s extraction at %s" % (name, btdir))
> > +            subprocess.check_call(["rm", "-rf", btdir])
> >          if not os.path.exists(btdir):
> > -            btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) +
> "/cluster-downloads-cache/" + os.path.basename(bttarball)
> >              print("Extracting %s %s" % (name, bttarball))
> > -            btlock = btdlpath + ".lock"
> > -            if not os.path.exists(os.path.dirname(btdlpath)):
> > -                os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
> > -            while True:
> > -                try:
> > -                    with open(btlock, 'a+') as lf:
> > -                        fileno = lf.fileno()
> > -                        fcntl.flock(fileno, fcntl.LOCK_EX)
> > -                        if sha256 and os.path.exists(btdlpath):
> > -                            dl_sha256 = sha256_file(btdlpath)
> > -                            if dl_sha256 != sha256:
> > -                                os.unlink(btdlpath)
> > -                        if not os.path.exists(btdlpath):
> > -                            if bttarball.startswith("/"):
> > -                                subprocess.check_call(["cp", bttarball,
> btdlpath])
> > -                            else:
> > -                                subprocess.check_call(["wget", "-O",
> btdlpath, bttarball])
> > -                            os.chmod(btdlpath, 0o775)
> > -                    break
> > -                except OSError:
> > -                    # We raced with someone else, try again
> > -                    pass
> >              subprocess.check_call(["bash", btdlpath, "-d", btdir, "-y"])
> >          enable_tools_tarball(btdir, name)
>
> Best regards,
>
> --
> Paul Barker
>
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#4110):
> https://lists.yoctoproject.org/g/yocto-patches/message/4110
> Mute This Topic: https://lists.yoctoproject.org/mt/119603240/924729
> Group Owner: yocto-patches+owner@lists.yoctoproject.org
> Unsubscribe:
> https://lists.yoctoproject.org/g/yocto-patches/leave/13169857/924729/1023951714/xyzzy
> [ticotimo@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
>
>
diff mbox series

Patch

diff --git a/scripts/utils.py b/scripts/utils.py
index 88842f0..87acad6 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -481,31 +481,49 @@  def setup_tools_tarball(ourconfig, btdir, bttarball, name="buildtools"):
         if ";" in bttarball:
             bttarball, sha256 = bttarball.split(";")
         btdir = os.path.abspath(btdir)
+        btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) + "/cluster-downloads-cache/" + os.path.basename(bttarball)
+        btlock = btdlpath + ".lock"
+        if not os.path.exists(os.path.dirname(btdlpath)):
+            os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
+        # Always run the cache-validation / download step so that a freshly
+        # published tarball is detected even when btdir already exists from a
+        # previous build.  tarball_updated is set to True whenever the cached
+        # download is replaced, which triggers removal of the stale btdir.
+        tarball_updated = False
+        while True:
+            try:
+                with open(btlock, 'a+') as lf:
+                    fileno = lf.fileno()
+                    fcntl.flock(fileno, fcntl.LOCK_EX)
+                    if sha256 and os.path.exists(btdlpath):
+                        dl_sha256 = sha256_file(btdlpath)
+                        if dl_sha256 != sha256:
+                            os.unlink(btdlpath)
+                    elif bttarball.startswith("/") and os.path.exists(btdlpath):
+                        # For local-path sources (e.g. vcontainer-tarball-latest)
+                        # invalidate the cached copy when the source is newer so
+                        # that a freshly-published tarball is always picked up.
+                        if os.path.getmtime(bttarball) > os.path.getmtime(btdlpath):
+                            os.unlink(btdlpath)
+                        os.unlink(btdlpath)
+                    if not os.path.exists(btdlpath):
+                        if bttarball.startswith("/"):
+                            subprocess.check_call(["cp", bttarball, btdlpath])
+                        else:
+                            subprocess.check_call(["wget", "-O", btdlpath, bttarball])
+                        os.chmod(btdlpath, 0o775)
+                        tarball_updated = True
+                break
+            except OSError:
+                # We raced with someone else, try again
+                pass
+        # If the underlying tarball changed, remove any stale extraction
+        # directory so it is re-extracted below.
+        if tarball_updated and os.path.exists(btdir):
+            print("Removing stale %s extraction at %s" % (name, btdir))
+            subprocess.check_call(["rm", "-rf", btdir])
         if not os.path.exists(btdir):
-            btdlpath = getconfig("BASE_SHAREDDIR", ourconfig) + "/cluster-downloads-cache/" + os.path.basename(bttarball)
             print("Extracting %s %s" % (name, bttarball))
-            btlock = btdlpath + ".lock"
-            if not os.path.exists(os.path.dirname(btdlpath)):
-                os.makedirs(os.path.dirname(btdlpath), exist_ok=True)
-            while True:
-                try:
-                    with open(btlock, 'a+') as lf:
-                        fileno = lf.fileno()
-                        fcntl.flock(fileno, fcntl.LOCK_EX)
-                        if sha256 and os.path.exists(btdlpath):
-                            dl_sha256 = sha256_file(btdlpath)
-                            if dl_sha256 != sha256:
-                                os.unlink(btdlpath)
-                        if not os.path.exists(btdlpath):
-                            if bttarball.startswith("/"):
-                                subprocess.check_call(["cp", bttarball, btdlpath])
-                            else:
-                                subprocess.check_call(["wget", "-O", btdlpath, bttarball])
-                            os.chmod(btdlpath, 0o775)
-                    break
-                except OSError:
-                    # We raced with someone else, try again
-                    pass
             subprocess.check_call(["bash", btdlpath, "-d", btdir, "-y"])
         enable_tools_tarball(btdir, name)