diff mbox series

sstate: Don't abort if installing a sstate package fails

Message ID 20220809083527.1221635-1-alban.bedel@aerq.com
State New
Headers show
Series sstate: Don't abort if installing a sstate package fails | expand

Commit Message

Bedel, Alban Aug. 9, 2022, 8:35 a.m. UTC
Currently if the installation of a sstate package fails the whole
build is aborted although bitbake could just fallback on running the
real task.

To fix this issue we replace the call to bb.fatal() in
sstate_setscene() with a simple sys.exit() to pass the error to bitbake
without aborting the build. The error message that was passed to
bb.fatal() is just removed as all error path already contains a
warning or note.

Then to further improve the build resilience we add a try block around
the sstate package unpacking. If it fails, for example because the
package was corrupted, we now print a warning and optionally remove
the broken package to allow it to be rebuilt. The cleaning of broken
packages can be disabled be setting the SSTATE_CLEAN_BROKEN_PACKAGES
variable to 0.

Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
---
 meta/classes/sstate.bbclass | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

Comments

Alexander Kanavin Aug. 9, 2022, 8:39 a.m. UTC | #1
Apologies, but no. This fixes the symptoms, not the problem, which is
that your infrastructure is malfunctioning. Why does it supply broken
artifacts?

Alex

On Tue, 9 Aug 2022 at 10:36, Alban Bedel via lists.openembedded.org
<alban.bedel=aerq.com@lists.openembedded.org> wrote:
>
> Currently if the installation of a sstate package fails the whole
> build is aborted although bitbake could just fallback on running the
> real task.
>
> To fix this issue we replace the call to bb.fatal() in
> sstate_setscene() with a simple sys.exit() to pass the error to bitbake
> without aborting the build. The error message that was passed to
> bb.fatal() is just removed as all error path already contains a
> warning or note.
>
> Then to further improve the build resilience we add a try block around
> the sstate package unpacking. If it fails, for example because the
> package was corrupted, we now print a warning and optionally remove
> the broken package to allow it to be rebuilt. The cleaning of broken
> packages can be disabled be setting the SSTATE_CLEAN_BROKEN_PACKAGES
> variable to 0.
>
> Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
> ---
>  meta/classes/sstate.bbclass | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
> index de6e7fa9608a..069901483bb2 100644
> --- a/meta/classes/sstate.bbclass
> +++ b/meta/classes/sstate.bbclass
> @@ -108,6 +108,9 @@ SSTATE_SIG_PASSPHRASE ?= ""
>  # Whether to verify the GnUPG signatures when extracting sstate archives
>  SSTATE_VERIFY_SIG ?= "0"
>
> +# Whether to remove broken sstate packages
> +SSTATE_CLEAN_BROKEN_PACKAGES ?= "1"
> +
>  SSTATE_HASHEQUIV_METHOD ?= "oe.sstatesig.OEOuthashBasic"
>  SSTATE_HASHEQUIV_METHOD[doc] = "The fully-qualified function used to calculate \
>      the output hash for a task, which in turn is used to determine equivalency. \
> @@ -375,9 +378,18 @@ def sstate_installpkg(ss, d):
>      sstateinst = d.getVar("SSTATE_INSTDIR")
>      d.setVar('SSTATE_FIXMEDIR', ss['fixmedir'])
>
> -    for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
> -        # All hooks should run in the SSTATE_INSTDIR
> -        bb.build.exec_func(f, d, (sstateinst,))
> +    try:
> +        for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
> +            # All hooks should run in the SSTATE_INSTDIR
> +            bb.build.exec_func(f, d, (sstateinst,))
> +    except bb.process.ExecutionError:
> +        bb.warn("Failed to install sstate package %s, skipping acceleration..." % sstatepkg)
> +        oe.path.remove(sstateinst)
> +        if bb.utils.to_boolean(d.getVar("SSTATE_CLEAN_BROKEN_PACKAGES"), True):
> +            bb.warn("Removing broken sstate package %s" % sstatepkg)
> +            oe.path.remove(sstatepkg)
> +            oe.path.remove(sstatepkg + '.*')
> +        return False
>
>      return sstate_installpkgdir(ss, d)
>
> @@ -763,7 +775,7 @@ def sstate_setscene(d):
>      shared_state = sstate_state_fromvars(d)
>      accelerate = sstate_installpkg(shared_state, d)
>      if not accelerate:
> -        bb.fatal("No suitable staging package found")
> +        sys.exit(1)
>
>  python sstate_task_prefunc () {
>      shared_state = sstate_state_fromvars(d)
> --
> 2.34.1
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#169140): https://lists.openembedded.org/g/openembedded-core/message/169140
> Mute This Topic: https://lists.openembedded.org/mt/92911047/1686489
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [alex.kanavin@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Richard Purdie Aug. 9, 2022, 8:59 a.m. UTC | #2
On Tue, 2022-08-09 at 10:35 +0200, Alban Bedel via
lists.openembedded.org wrote:
> Currently if the installation of a sstate package fails the whole
> build is aborted although bitbake could just fallback on running the
> real task.
> 
> To fix this issue we replace the call to bb.fatal() in
> sstate_setscene() with a simple sys.exit() to pass the error to bitbake
> without aborting the build. The error message that was passed to
> bb.fatal() is just removed as all error path already contains a
> warning or note.
> 
> Then to further improve the build resilience we add a try block around
> the sstate package unpacking. If it fails, for example because the
> package was corrupted, we now print a warning and optionally remove
> the broken package to allow it to be rebuilt. The cleaning of broken
> packages can be disabled be setting the SSTATE_CLEAN_BROKEN_PACKAGES
> variable to 0.
> 
> Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
> ---
>  meta/classes/sstate.bbclass | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
> index de6e7fa9608a..069901483bb2 100644
> --- a/meta/classes/sstate.bbclass
> +++ b/meta/classes/sstate.bbclass
> @@ -108,6 +108,9 @@ SSTATE_SIG_PASSPHRASE ?= ""
>  # Whether to verify the GnUPG signatures when extracting sstate archives
>  SSTATE_VERIFY_SIG ?= "0"
>  
> +# Whether to remove broken sstate packages
> +SSTATE_CLEAN_BROKEN_PACKAGES ?= "1"
> +
>  SSTATE_HASHEQUIV_METHOD ?= "oe.sstatesig.OEOuthashBasic"
>  SSTATE_HASHEQUIV_METHOD[doc] = "The fully-qualified function used to calculate \
>      the output hash for a task, which in turn is used to determine equivalency. \
> @@ -375,9 +378,18 @@ def sstate_installpkg(ss, d):
>      sstateinst = d.getVar("SSTATE_INSTDIR")
>      d.setVar('SSTATE_FIXMEDIR', ss['fixmedir'])
>  
> -    for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
> -        # All hooks should run in the SSTATE_INSTDIR
> -        bb.build.exec_func(f, d, (sstateinst,))
> +    try:
> +        for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
> +            # All hooks should run in the SSTATE_INSTDIR
> +            bb.build.exec_func(f, d, (sstateinst,))
> +    except bb.process.ExecutionError:
> +        bb.warn("Failed to install sstate package %s, skipping acceleration..." % sstatepkg)
> +        oe.path.remove(sstateinst)
> +        if bb.utils.to_boolean(d.getVar("SSTATE_CLEAN_BROKEN_PACKAGES"), True):
> +            bb.warn("Removing broken sstate package %s" % sstatepkg)
> +            oe.path.remove(sstatepkg)
> +            oe.path.remove(sstatepkg + '.*')
> +        return False
>  
>      return sstate_installpkgdir(ss, d)
>  
> @@ -763,7 +775,7 @@ def sstate_setscene(d):
>      shared_state = sstate_state_fromvars(d)
>      accelerate = sstate_installpkg(shared_state, d)
>      if not accelerate:
> -        bb.fatal("No suitable staging package found")
> +        sys.exit(1)
>  
>  python sstate_task_prefunc () {
>      shared_state = sstate_state_fromvars(d)

How are you corrupting sstate files? They're moved into place
atomically at the end of processing so this should be really hard to
do.

As such, I'm reluctant to take any patch like this as it simply
shouldn't happen, and if it does, there are bigger issues which need to
be fixed.

Cheers,

Richard
Jose Quaresma Aug. 9, 2022, 9:14 a.m. UTC | #3
Hi Alban,

Alban Bedel via lists.openembedded.org <alban.bedel=
aerq.com@lists.openembedded.org> escreveu no dia terça, 9/08/2022 à(s)
09:36:

> Currently if the installation of a sstate package fails the whole
> build is aborted although bitbake could just fallback on running the
> real task.
>
> To fix this issue we replace the call to bb.fatal() in
> sstate_setscene() with a simple sys.exit() to pass the error to bitbake
> without aborting the build. The error message that was passed to
> bb.fatal() is just removed as all error path already contains a
> warning or note.
>
> Then to further improve the build resilience we add a try block around
> the sstate package unpacking. If it fails, for example because the
> package was corrupted, we now print a warning and optionally remove
> the broken package to allow it to be rebuilt. The cleaning of broken
> packages can be disabled be setting the SSTATE_CLEAN_BROKEN_PACKAGES
> variable to 0.
>

What do you mean by package was corrupted?
If it fails when extracting the archive is because your filesystem is
corrupted and what needs to be fixed is the file system.

Jose


>
> Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
> ---
>  meta/classes/sstate.bbclass | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
> index de6e7fa9608a..069901483bb2 100644
> --- a/meta/classes/sstate.bbclass
> +++ b/meta/classes/sstate.bbclass
> @@ -108,6 +108,9 @@ SSTATE_SIG_PASSPHRASE ?= ""
>  # Whether to verify the GnUPG signatures when extracting sstate archives
>  SSTATE_VERIFY_SIG ?= "0"
>
> +# Whether to remove broken sstate packages
> +SSTATE_CLEAN_BROKEN_PACKAGES ?= "1"
> +
>  SSTATE_HASHEQUIV_METHOD ?= "oe.sstatesig.OEOuthashBasic"
>  SSTATE_HASHEQUIV_METHOD[doc] = "The fully-qualified function used to
> calculate \
>      the output hash for a task, which in turn is used to determine
> equivalency. \
> @@ -375,9 +378,18 @@ def sstate_installpkg(ss, d):
>      sstateinst = d.getVar("SSTATE_INSTDIR")
>      d.setVar('SSTATE_FIXMEDIR', ss['fixmedir'])
>
> -    for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() +
> ['sstate_unpack_package']:
> -        # All hooks should run in the SSTATE_INSTDIR
> -        bb.build.exec_func(f, d, (sstateinst,))
> +    try:
> +        for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() +
> ['sstate_unpack_package']:
> +            # All hooks should run in the SSTATE_INSTDIR
> +            bb.build.exec_func(f, d, (sstateinst,))
> +    except bb.process.ExecutionError:
> +        bb.warn("Failed to install sstate package %s, skipping
> acceleration..." % sstatepkg)
> +        oe.path.remove(sstateinst)
> +        if bb.utils.to_boolean(d.getVar("SSTATE_CLEAN_BROKEN_PACKAGES"),
> True):
> +            bb.warn("Removing broken sstate package %s" % sstatepkg)
> +            oe.path.remove(sstatepkg)
> +            oe.path.remove(sstatepkg + '.*')
> +        return False
>
>      return sstate_installpkgdir(ss, d)
>
> @@ -763,7 +775,7 @@ def sstate_setscene(d):
>      shared_state = sstate_state_fromvars(d)
>      accelerate = sstate_installpkg(shared_state, d)
>      if not accelerate:
> -        bb.fatal("No suitable staging package found")
> +        sys.exit(1)
>
>  python sstate_task_prefunc () {
>      shared_state = sstate_state_fromvars(d)
> --
> 2.34.1
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#169140):
> https://lists.openembedded.org/g/openembedded-core/message/169140
> Mute This Topic: https://lists.openembedded.org/mt/92911047/5052612
> Group Owner: openembedded-core+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub [
> quaresma.jose@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
>
Bedel, Alban Aug. 9, 2022, 1:48 p.m. UTC | #4
On Tue, 2022-08-09 at 09:59 +0100, Richard Purdie wrote:
> On Tue, 2022-08-09 at 10:35 +0200, Alban Bedel via
> lists.openembedded.org wrote:
> > Currently if the installation of a sstate package fails the whole
> > build is aborted although bitbake could just fallback on running the
> > real task.
> > 
> > To fix this issue we replace the call to bb.fatal() in
> > sstate_setscene() with a simple sys.exit() to pass the error to bitbake
> > without aborting the build. The error message that was passed to
> > bb.fatal() is just removed as all error path already contains a
> > warning or note.
> > 
> > Then to further improve the build resilience we add a try block around
> > the sstate package unpacking. If it fails, for example because the
> > package was corrupted, we now print a warning and optionally remove
> > the broken package to allow it to be rebuilt. The cleaning of broken
> > packages can be disabled be setting the SSTATE_CLEAN_BROKEN_PACKAGES
> > variable to 0.
> > 
> > Signed-off-by: Alban Bedel <alban.bedel@aerq.com>
> > ---
> >  meta/classes/sstate.bbclass | 20 ++++++++++++++++----
> >  1 file changed, 16 insertions(+), 4 deletions(-)
> > 
> > diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
> > index de6e7fa9608a..069901483bb2 100644
> > --- a/meta/classes/sstate.bbclass
> > +++ b/meta/classes/sstate.bbclass
> > @@ -108,6 +108,9 @@ SSTATE_SIG_PASSPHRASE ?= ""
> >  # Whether to verify the GnUPG signatures when extracting sstate archives
> >  SSTATE_VERIFY_SIG ?= "0"
> >  
> > +# Whether to remove broken sstate packages
> > +SSTATE_CLEAN_BROKEN_PACKAGES ?= "1"
> > +
> >  SSTATE_HASHEQUIV_METHOD ?= "oe.sstatesig.OEOuthashBasic"
> >  SSTATE_HASHEQUIV_METHOD[doc] = "The fully-qualified function used to
> > calculate \
> >      the output hash for a task, which in turn is used to determine
> > equivalency. \
> > @@ -375,9 +378,18 @@ def sstate_installpkg(ss, d):
> >      sstateinst = d.getVar("SSTATE_INSTDIR")
> >      d.setVar('SSTATE_FIXMEDIR', ss['fixmedir'])
> >  
> > -    for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() +
> > ['sstate_unpack_package']:
> > -        # All hooks should run in the SSTATE_INSTDIR
> > -        bb.build.exec_func(f, d, (sstateinst,))
> > +    try:
> > +        for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() +
> > ['sstate_unpack_package']:
> > +            # All hooks should run in the SSTATE_INSTDIR
> > +            bb.build.exec_func(f, d, (sstateinst,))
> > +    except bb.process.ExecutionError:
> > +        bb.warn("Failed to install sstate package %s, skipping
> > acceleration..." % sstatepkg)
> > +        oe.path.remove(sstateinst)
> > +        if bb.utils.to_boolean(d.getVar("SSTATE_CLEAN_BROKEN_PACKAGES"),
> > True):
> > +            bb.warn("Removing broken sstate package %s" % sstatepkg)
> > +            oe.path.remove(sstatepkg)
> > +            oe.path.remove(sstatepkg + '.*')
> > +        return False
> >  
> >      return sstate_installpkgdir(ss, d)
> >  
> > @@ -763,7 +775,7 @@ def sstate_setscene(d):
> >      shared_state = sstate_state_fromvars(d)
> >      accelerate = sstate_installpkg(shared_state, d)
> >      if not accelerate:
> > -        bb.fatal("No suitable staging package found")
> > +        sys.exit(1)
> >  
> >  python sstate_task_prefunc () {
> >      shared_state = sstate_state_fromvars(d)
> 
> How are you corrupting sstate files? They're moved into place
> atomically at the end of processing so this should be really hard to
> do.

We are still hunting for this problem, but as it happen on a shared build
server with several users it is not trivial to find why this is happening.

> As such, I'm reluctant to take any patch like this as it simply
> shouldn't happen, and if it does, there are bigger issues which need to
> be fixed.

I kind of expected this answer as I'm fully aware that this is only a band
aid. On the other hand sstates are supposed to be an optional optimisation, so
I find it very annoying to have them kill the whole build when somethings goes
wrong. Even more so when it turned out that bitbake fully support falling back
to the real task and it is just OE that enforce a failure instead of
outputting a warning and finishing the build.

Would you consider a patch where this fallback would be optional and turned
off by default?

Alban
Alexander Kanavin Aug. 9, 2022, 4:22 p.m. UTC | #5
On Tue, 9 Aug 2022 at 15:49, Alban Bedel via lists.openembedded.org
<alban.bedel=aerq.com@lists.openembedded.org> wrote:
> > How are you corrupting sstate files? They're moved into place
> > atomically at the end of processing so this should be really hard to
> > do.
>
> We are still hunting for this problem, but as it happen on a shared build
> server with several users it is not trivial to find why this is happening.

Is somebody running 'bitbake -c cleansstate' or otherwise removing
items from sstate while someone else is running a regular build? Don't
do this, bitbake can't handle it.

Alex
diff mbox series

Patch

diff --git a/meta/classes/sstate.bbclass b/meta/classes/sstate.bbclass
index de6e7fa9608a..069901483bb2 100644
--- a/meta/classes/sstate.bbclass
+++ b/meta/classes/sstate.bbclass
@@ -108,6 +108,9 @@  SSTATE_SIG_PASSPHRASE ?= ""
 # Whether to verify the GnUPG signatures when extracting sstate archives
 SSTATE_VERIFY_SIG ?= "0"
 
+# Whether to remove broken sstate packages
+SSTATE_CLEAN_BROKEN_PACKAGES ?= "1"
+
 SSTATE_HASHEQUIV_METHOD ?= "oe.sstatesig.OEOuthashBasic"
 SSTATE_HASHEQUIV_METHOD[doc] = "The fully-qualified function used to calculate \
     the output hash for a task, which in turn is used to determine equivalency. \
@@ -375,9 +378,18 @@  def sstate_installpkg(ss, d):
     sstateinst = d.getVar("SSTATE_INSTDIR")
     d.setVar('SSTATE_FIXMEDIR', ss['fixmedir'])
 
-    for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
-        # All hooks should run in the SSTATE_INSTDIR
-        bb.build.exec_func(f, d, (sstateinst,))
+    try:
+        for f in (d.getVar('SSTATEPREINSTFUNCS') or '').split() + ['sstate_unpack_package']:
+            # All hooks should run in the SSTATE_INSTDIR
+            bb.build.exec_func(f, d, (sstateinst,))
+    except bb.process.ExecutionError:
+        bb.warn("Failed to install sstate package %s, skipping acceleration..." % sstatepkg)
+        oe.path.remove(sstateinst)
+        if bb.utils.to_boolean(d.getVar("SSTATE_CLEAN_BROKEN_PACKAGES"), True):
+            bb.warn("Removing broken sstate package %s" % sstatepkg)
+            oe.path.remove(sstatepkg)
+            oe.path.remove(sstatepkg + '.*')
+        return False
 
     return sstate_installpkgdir(ss, d)
 
@@ -763,7 +775,7 @@  def sstate_setscene(d):
     shared_state = sstate_state_fromvars(d)
     accelerate = sstate_installpkg(shared_state, d)
     if not accelerate:
-        bb.fatal("No suitable staging package found")
+        sys.exit(1)
 
 python sstate_task_prefunc () {
     shared_state = sstate_state_fromvars(d)