diff mbox series

[6/6] siggen/runqueue: Allow skipping of rehash if not present on hashserve

Message ID 20240524151200.2449512-6-richard.purdie@linuxfoundation.org
State New
Headers show
Series [1/6] siggen/runqueue: Store whether the hash was present on the hashserver or not | expand

Commit Message

Richard Purdie May 24, 2024, 3:12 p.m. UTC
If a unihash isn't present on the hashserver, it is extremely unlikely any
following unihashes would be either. Use this information to skip unihash
checks in that scenario where there is low matching to allow the build
to proceed more quickly. This significantly improves performance on the
low match scenario.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/runqueue.py | 35 ++++++++++++++++++++++++++----
 lib/bb/siggen.py   | 54 +++++++++++++++++++++++++---------------------
 2 files changed, 61 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 5626ffcccb..0fb9edc77e 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -472,6 +472,7 @@  class RunQueueData:
         self.reset()
 
     def reset(self):
+        self.hashserve_valid = set()
         self.runtaskentries = {}
 
     def runq_depends_names(self, ids):
@@ -1287,11 +1288,23 @@  class RunQueueData:
                     # get_taskhash for a given tid *must* be called before get_unihash* below
                     self.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(tid, self.runtaskentries[tid].depends, self.dataCaches)
                     ready.add(tid)
-            unihashes = bb.parse.siggen.get_unihashes(ready)
+
+            ready2 = set()
+            for tid in ready:
+                if self.runtaskentries[tid].depends.issubset(self.hashserve_valid):
+                    ready2.add(tid)
+            unihashes = bb.parse.siggen.get_unihashes(ready2)
             for tid in ready:
                 dealtwith.add(tid)
                 todeal.remove(tid)
-                self.runtaskentries[tid].unihash = unihashes[tid]
+                if tid in unihashes:
+                    self.runtaskentries[tid].unihash, present = unihashes[tid]
+                    if present:
+                        self.hashserve_valid.add(tid)
+                else:
+                    taskhash = self.runtaskentries[tid].hash
+                    bb.parse.siggen.set_unihash(tid, taskhash, False)
+                    self.runtaskentries[tid].unihash = taskhash
 
             bb.event.check_for_interrupts(self.cooker.data)
 
@@ -2585,13 +2598,25 @@  class RunQueueExecute:
                 # get_taskhash for a given tid *must* be called before get_unihash* below
                 ready[tid] = bb.parse.siggen.get_taskhash(tid, self.rqdata.runtaskentries[tid].depends, self.rqdata.dataCaches)
 
-            unihashes = bb.parse.siggen.get_unihashes(ready.keys())
+            toquery = set()
+            for tid in ready:
+                if self.rqdata.runtaskentries[tid].depends.issubset(self.rqdata.hashserve_valid):
+                    toquery.add(tid)
+
+            unihashes = bb.parse.siggen.get_unihashes(toquery)
 
             for tid in ready:
                 orighash = self.rqdata.runtaskentries[tid].hash
                 newhash = ready[tid]
                 origuni = self.rqdata.runtaskentries[tid].unihash
-                newuni = unihashes[tid]
+
+                if tid in unihashes:
+                    newuni, present = unihashes[tid]
+                    if present:
+                        self.rqdata.hashserve_valid.add(tid)
+                else:
+                    bb.parse.siggen.set_unihash(tid, origuni, False)
+                    newuni = origuni
 
                 # FIXME, need to check it can come from sstate at all for determinism?
                 remapped = False
@@ -2623,6 +2648,8 @@  class RunQueueExecute:
         if (endtime-starttime > 60):
             hashequiv_logger.verbose("Rehash loop took more than 60s: %s" % (endtime-starttime))
 
+        bb.parse.siggen.save_unitaskhashes()
+
         if changed:
             for mc in self.rq.worker:
                 RunQueue.send_pickled_data(self.rq.worker[mc].process, bb.parse.siggen.get_taskhashes(), "newtaskhashes")
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index e8fe806c36..56528ea10a 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -112,16 +112,19 @@  class SignatureGenerator(object):
                 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
 
     def get_cached_unihash(self, tid):
-        return None
-
-    def get_unihash(self, tid):
-        unihash = self.get_cached_unihash(tid)
-        if unihash:
+        return None, False
+
+    def get_unihash(self, tid, presenceinfo=False):
+        unihash, present = self.get_cached_unihash(tid)
+        if not unihash:
+            unihash, present = self.taskhash[tid], False
+        if presenceinfo:
+            return unihash, present
+        else:
             return unihash
-        return self.taskhash[tid]
 
     def get_unihashes(self, tids):
-        return {tid: self.get_unihash(tid) for tid in tids}
+        return {tid: self.get_unihash(tid, True) for tid in tids}
 
     def prep_taskhash(self, tid, deps, dataCaches):
         return
@@ -624,7 +627,7 @@  class SignatureGeneratorUniHashMixIn(object):
             # If a unique hash is reported, use it as the stampfile hash. This
             # ensures that if a task won't be re-run if the taskhash changes,
             # but it would result in the same output hash
-            unihash = self._get_unihash(tid)
+            unihash, _ = self._get_unihash(tid)
             if unihash is not None:
                 return unihash
 
@@ -638,17 +641,17 @@  class SignatureGeneratorUniHashMixIn(object):
 
     def _get_unihash(self, tid, checkkey=None):
         if tid not in self.tidtopn:
-            return None
+            return None, False
         (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
         key = mc + ":" + self.tidtopn[tid] + ":" + taskname
         if key not in self.hashserv_cache:
-            return None
+            return None, False
         if not checkkey:
             checkkey = self.taskhash[tid]
         (key, unihash, present) = self.hashserv_cache[key]
         if key != checkkey:
-            return None
-        return unihash
+            return None, False
+        return unihash, present
 
     def get_cached_unihash(self, tid):
         taskhash = self.taskhash[tid]
@@ -656,16 +659,16 @@  class SignatureGeneratorUniHashMixIn(object):
         # If its not a setscene task we can return
         if self.setscenetasks and tid not in self.setscenetasks:
             self.unihash[tid] = None
-            return taskhash
+            return taskhash, True
 
         # TODO: This cache can grow unbounded. It probably only needs to keep
         # for each task
-        unihash =  self._get_unihash(tid)
+        unihash, present =  self._get_unihash(tid)
         if unihash is not None:
             self.unihash[tid] = unihash
-            return unihash
+            return unihash, present
 
-        return None
+        return None, False
 
     def _get_method(self, tid):
         method = self.method
@@ -703,8 +706,10 @@  class SignatureGeneratorUniHashMixIn(object):
 
         return result
 
-    def get_unihash(self, tid):
-        return self.get_unihashes([tid])[tid]
+    def get_unihash(self, tid, presenceinfo=False):
+        if presenceinfo:
+            return self.get_unihashes([tid])[tid]
+        return self.get_unihashes([tid])[tid][0]
 
     def get_unihashes(self, tids):
         """
@@ -716,9 +721,9 @@  class SignatureGeneratorUniHashMixIn(object):
         query_result = {}
 
         for tid in tids:
-            unihash = self.get_cached_unihash(tid)
+            unihash, present = self.get_cached_unihash(tid)
             if unihash:
-                result[tid] = unihash
+                result[tid] = (unihash, present)
             else:
                 queries[tid] = (self._get_method(tid), self.taskhash[tid])
 
@@ -754,14 +759,15 @@  class SignatureGeneratorUniHashMixIn(object):
                 # so it is reported it at debug level 2. If they differ, that
                 # is much more interesting, so it is reported at debug level 1
                 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
-                self.set_unihash(tid, unihash, True)
+                present = True
             else:
                 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
                 unihash = taskhash
-                self.set_unihash(tid, unihash, False)
+                present = False
 
+            self.set_unihash(tid, unihash, present)
             self.unihash[tid] = unihash
-            result[tid] = unihash
+            result[tid] = (unihash, present)
 
         return result
 
@@ -784,7 +790,7 @@  class SignatureGeneratorUniHashMixIn(object):
             return
 
         # Sanity checks
-        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
+        cache_unihash, _ = self._get_unihash(tid, checkkey=taskhash)
         if cache_unihash is None:
             bb.fatal('%s not in unihash cache. Please report this error' % key)