diff mbox series

runqueue: Avoid dumpsigs idle loop blocking

Message ID 20241104174630.1274186-1-richard.purdie@linuxfoundation.org
State Accepted, archived
Commit e66f1b643b4b77404ba31f2704cda5af9bf00a57
Headers show
Series runqueue: Avoid dumpsigs idle loop blocking | expand

Commit Message

Richard Purdie Nov. 4, 2024, 5:46 p.m. UTC
We're seeing some failures on hosts where slow "idle" loop iterations are
causing bitbake server timeouts. These seem to happen particularly in the
dump_signatures() function within runqueue.

That isn't entirely surprising since it creates a pool of threads to execute
work an at best can take around 10s to execture and return control backto the
main loop. On a slow system, it is understandable this can take longer,
particularly as these functions are creating large chunks of IO.

Since the work is being done in threads, we can launch them, return to idle
and check on the results periodically as they complete.

This should hopefully address some of the remaining timeout issues we see on
the autobuilder in oe-selftest sstate tests.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/runqueue.py | 69 ++++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 27 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 3462ed4457..bee315c36d 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -128,6 +128,7 @@  class RunQueueStats:
 # runQueue state machine
 runQueuePrepare = 2
 runQueueSceneInit = 3
+runQueueDumpSigs = 4
 runQueueRunning = 6
 runQueueFailed = 7
 runQueueCleanUp = 8
@@ -1588,13 +1589,18 @@  class RunQueue:
             self.rqdata.init_progress_reporter.next_stage()
             self.rqexe = RunQueueExecute(self)
 
-            dump = self.cooker.configuration.dump_signatures
-            if dump:
+            dumpsigs = self.cooker.configuration.dump_signatures
+            if dumpsigs:
                 self.rqdata.init_progress_reporter.finish()
-                if 'printdiff' in dump:
+                if 'printdiff' in dumpsigs:
                     invalidtasks = self.print_diffscenetasks()
-                self.dump_signatures(dump)
-                if 'printdiff' in dump:
+                self.state = runQueueDumpSigs
+
+        if self.state is runQueueDumpSigs:
+            dumpsigs = self.cooker.configuration.dump_signatures
+            retval = self.dump_signatures(dumpsigs)
+            if retval is False:
+                if 'printdiff' in dumpsigs:
                     self.write_diffscenetasks(invalidtasks)
                 self.state = runQueueComplete
 
@@ -1686,33 +1692,42 @@  class RunQueue:
             bb.parse.siggen.dump_sigtask(taskfn, taskname, dataCaches[mc].stamp[taskfn], True)
 
     def dump_signatures(self, options):
-        if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset:
-            bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled")
-
-        bb.note("Writing task signature files")
-
-        max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1)
-        def chunkify(l, n):
-            return [l[i::n] for i in range(n)]
-        tids = chunkify(list(self.rqdata.runtaskentries), max_process)
-        # We cannot use the real multiprocessing.Pool easily due to some local data
-        # that can't be pickled. This is a cheap multi-process solution.
-        launched = []
-        while tids:
-            if len(launched) < max_process:
-                p = Process(target=self._rq_dump_sigtid, args=(tids.pop(), ))
+        if not hasattr(self, "dumpsigs_launched"):
+            if bb.cooker.CookerFeatures.RECIPE_SIGGEN_INFO not in self.cooker.featureset:
+                bb.fatal("The dump signatures functionality needs the RECIPE_SIGGEN_INFO feature enabled")
+
+            bb.note("Writing task signature files")
+
+            max_process = int(self.cfgData.getVar("BB_NUMBER_PARSE_THREADS") or os.cpu_count() or 1)
+            def chunkify(l, n):
+                return [l[i::n] for i in range(n)]
+            dumpsigs_tids = chunkify(list(self.rqdata.runtaskentries), max_process)
+
+            # We cannot use the real multiprocessing.Pool easily due to some local data
+            # that can't be pickled. This is a cheap multi-process solution.
+            self.dumpsigs_launched = []
+
+            for tid in dumpsigs_tids:
+                p = Process(target=self._rq_dump_sigtid, args=(dumpsigs_tids.pop(), ))
                 p.start()
-                launched.append(p)
-            for q in launched:
-                # The finished processes are joined when calling is_alive()
-                if not q.is_alive():
-                    launched.remove(q)
-        for p in launched:
+                self.dumpsigs_launched.append(p)
+
+            return 1.0
+
+        for q in self.dumpsigs_launched:
+            # The finished processes are joined when calling is_alive()
+            if not q.is_alive():
+                self.dumpsigs_launched.remove(q)
+
+        if self.dumpsigs_launched:
+            return 1.0
+
+        for p in self.dumpsigs_launched:
                 p.join()
 
         bb.parse.siggen.dump_sigs(self.rqdata.dataCaches, options)
 
-        return
+        return False
 
     def print_diffscenetasks(self):
         def get_root_invalid_tasks(task, taskdepends, valid, noexec, visited_invalid):