From patchwork Sat Mar 19 11:46:42 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Reyna, David" X-Patchwork-Id: 5518 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id C7C83C433EF for ; Sat, 19 Mar 2022 11:46:47 +0000 (UTC) Received: from mail1.wrs.com (mail1.wrs.com [147.11.3.146]) by mx.groups.io with SMTP id smtpd.web12.6262.1647690406440537405 for ; Sat, 19 Mar 2022 04:46:46 -0700 Authentication-Results: mx.groups.io; dkim=missing; spf=permerror, err=parse error for token &{10 18 %{ir}.%{v}.%{d}.spf.has.pphosted.com}: invalid domain name (domain: windriver.com, ip: 147.11.3.146, mailfrom: david.reyna@windriver.com) Received: from mail.windriver.com (mail.wrs.com [147.11.1.11]) by mail1.wrs.com (8.15.2/8.15.2) with ESMTPS id 22JBkihB008083 (version=TLSv1.1 cipher=DHE-RSA-AES256-SHA bits=256 verify=FAIL) for ; Sat, 19 Mar 2022 04:46:44 -0700 Received: from ala-exchng01.corp.ad.wrs.com (ala-exchng01.corp.ad.wrs.com [147.11.82.252]) by mail.windriver.com (8.15.2/8.15.2) with ESMTPS id 22JBkhlq002675 (version=TLSv1.2 cipher=AES256-GCM-SHA384 bits=256 verify=FAIL) for ; Sat, 19 Mar 2022 04:46:43 -0700 (PDT) Received: from ala-exchng01.corp.ad.wrs.com (147.11.82.252) by ala-exchng01.corp.ad.wrs.com (147.11.82.252) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2242.12; Sat, 19 Mar 2022 04:46:42 -0700 Received: from ala-lpggp2.wrs.com (147.11.105.123) by ala-exchng01.corp.ad.wrs.com (147.11.82.252) with Microsoft SMTP Server id 15.1.2242.12 via Frontend Transport; Sat, 19 Mar 2022 04:46:42 -0700 From: David Reyna To: Subject: [PATCH] toaster: detect when bitbake crashed Date: Sat, 19 Mar 2022 04:46:42 -0700 Message-ID: <20220319114642.91262-1-david.reyna@windriver.com> X-Mailer: git-send-email 2.17.1 MIME-Version: 1.0 List-Id: X-Webhook-Received: from li982-79.members.linode.com [45.33.32.79] by aws-us-west-2-korg-lkml-1.web.codeaurora.org with HTTPS for ; Sat, 19 Mar 2022 11:46:47 -0000 X-Groupsio-URL: https://lists.yoctoproject.org/g/toaster/message/5784 From: David Reyna Add a polling check on tracebacks in a build's log. This can for example indicate that bitbake crashed, which would stop the event stream that Toaster normally uses to detect build errors. [YOCTO #14085] Signed-off-by: David Reyna --- .../management/commands/runbuilds.py | 83 ++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/bitbake/lib/toaster/bldcontrol/management/commands/runbuilds.py b/bitbake/lib/toaster/bldcontrol/management/commands/runbuilds.py index 19f659ec41..834e32b36f 100644 --- a/bitbake/lib/toaster/bldcontrol/management/commands/runbuilds.py +++ b/bitbake/lib/toaster/bldcontrol/management/commands/runbuilds.py @@ -180,6 +180,77 @@ class Command(BaseCommand): except Exception as e: logger.warning("runbuilds: schedule exception %s" % str(e)) + # Test to see if a build pre-maturely died due to a bitbake crash + def check_dead_builds(self): + do_cleanup = False + try: + for br in BuildRequest.objects.filter(state=BuildRequest.REQ_INPROGRESS): + # Get the build directory + if br.project.builddir: + builddir = br.project.builddir + else: + builddir = '%s-toaster-%d' % (br.environment.builddir,br.project.id) + # Check log to see if there is a recent traceback + toaster_ui_log = os.path.join(builddir, 'toaster_ui.log') + test_file = os.path.join(builddir, '._toaster_check.txt') + os.system("tail -n 50 %s > %s" % (os.path.join(builddir, 'toaster_ui.log'),test_file)) + traceback_text = '' + is_traceback = False + with open(test_file,'r') as test_file_fd: + test_file_tail = test_file_fd.readlines() + for line in test_file_tail: + if line.startswith('Traceback (most recent call last):'): + traceback_text = line + is_traceback = True + elif line.startswith('NOTE: ToasterUI waiting for events'): + # Ignore any traceback before new build start + traceback_text = '' + is_traceback = False + elif line.startswith('Note: Toaster traceback auto-stop'): + # Ignore any traceback before this previous traceback catch + traceback_text = '' + is_traceback = False + elif is_traceback: + traceback_text += line + # Test the results + is_stop = False + if is_traceback: + # Found a traceback + errtype = 'Bitbake crash' + errmsg = 'Bitbake crash\n' + traceback_text + state = BuildRequest.REQ_FAILED + # Clean up bitbake files + bitbake_lock = os.path.join(builddir, 'bitbake.lock') + if os.path.isfile(bitbake_lock): + os.remove(bitbake_lock) + bitbake_sock = os.path.join(builddir, 'bitbake.sock') + if os.path.isfile(bitbake_sock): + os.remove(bitbake_sock) + if os.path.isfile(test_file): + os.remove(test_file) + # Add note to ignore this traceback on next check + os.system('echo "Note: Toaster traceback auto-stop" >> %s' % toaster_ui_log) + is_stop = True + # Add more tests here + #elif ... + # Stop the build request? + if is_stop: + brerror = BRError( + req = br, + errtype = errtype, + errmsg = errmsg, + traceback = traceback_text, + ) + brerror.save() + br.state = state + br.save() + do_cleanup = True + # Do cleanup + if do_cleanup: + self.cleanup() + except Exception as e: + logger.error("runbuilds: Error in check_dead_builds %s" % e) + def handle(self, **options): pidfile_path = os.path.join(os.environ.get("BUILDDIR", "."), ".runbuilds.pid") @@ -187,10 +258,18 @@ class Command(BaseCommand): with open(pidfile_path, 'w') as pidfile: pidfile.write("%s" % os.getpid()) + # Clean up any stale/failed builds from previous Toaster run self.runbuild() signal.signal(signal.SIGUSR1, lambda sig, frame: None) while True: - signal.pause() - self.runbuild() + sigset = signal.sigtimedwait([signal.SIGUSR1], 5) + if sigset: + for sig in sigset: + # Consume each captured pending event + self.runbuild() + else: + # Check for build exceptions + self.check_dead_builds() +