diff mbox series

[1/6] siggen/runqueue: Store whether the hash was present on the hashserver or not

Message ID 20240524151200.2449512-1-richard.purdie@linuxfoundation.org
State New
Headers show
Series [1/6] siggen/runqueue: Store whether the hash was present on the hashserver or not | expand

Commit Message

Richard Purdie May 24, 2024, 3:11 p.m. UTC
We need to be able to know if a unihash came from the hashequivalence
server or not. Add this information to the cache data so we can use
it in later code changes.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/runqueue.py |  2 +-
 lib/bb/siggen.py   | 32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

Comments

Richard Purdie May 28, 2024, 10:23 a.m. UTC | #1
On Fri, 2024-05-24 at 16:12 +0100, Richard Purdie via lists.openembedded.org wrote:
> If a unihash isn't present on the hashserver, it is extremely unlikely any
> following unihashes would be either. Use this information to skip unihash
> checks in that scenario where there is low matching to allow the build
> to proceed more quickly. This significantly improves performance on the
> low match scenario.
> 
> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
> ---
>  lib/bb/runqueue.py | 35 ++++++++++++++++++++++++++----
>  lib/bb/siggen.py   | 54 +++++++++++++++++++++++++---------------------
>  2 files changed, 61 insertions(+), 28 deletions(-)

This patch isn't stable and there are also what looks like problems in
the hashserve client parallel code with connection closed errors.

I'm going to split the series, trying to get the basic bits working
with MAX_PARALLEL=1 then look at the parallel queries and the skips
separately.

I will note the performance impact of these problems on the testing
infrastructure is dire but only really visible when the logging patch
in this series is applied.

Cheers,

Richard
diff mbox series

Patch

diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 6b43f303d5..6c2034176e 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -2530,7 +2530,7 @@  class RunQueueExecute:
                 for hashtid in torehash:
                     hashequiv_logger.verbose("Task %s unihash changed to %s" % (hashtid, unihash))
                     self.rqdata.runtaskentries[hashtid].unihash = unihash
-                    bb.parse.siggen.set_unihash(hashtid, unihash)
+                    bb.parse.siggen.set_unihash(hashtid, unihash, True)
                     toprocess.add(hashtid)
                 if torehash:
                     # Need to save after set_unihash above
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index 03dfda6f3c..e8fe806c36 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -80,7 +80,7 @@  class SignatureGenerator(object):
         self.runtaskdeps = {}
         self.file_checksum_values = {}
         self.taints = {}
-        self.unitaskhashes = {}
+        self.hashserv_cache = {}
         self.tidtopn = {}
         self.setscenetasks = set()
 
@@ -184,19 +184,19 @@  class SignatureGenerator(object):
         return
 
     def get_taskdata(self):
-        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
+        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.hashserv_cache, self.tidtopn, self.setscenetasks)
 
     def set_taskdata(self, data):
-        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
+        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.hashserv_cache, self.tidtopn, self.setscenetasks = data
 
     def reset(self, data):
         self.__init__(data)
 
     def get_taskhashes(self):
-        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
+        return self.taskhash, self.unihash, self.hashserv_cache, self.tidtopn
 
     def set_taskhashes(self, hashes):
-        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
+        self.taskhash, self.unihash, self.hashserv_cache, self.tidtopn = hashes
 
     def save_unitaskhashes(self):
         return
@@ -238,8 +238,8 @@  class SignatureGeneratorBasic(SignatureGenerator):
         else:
             self.checksum_cache = None
 
-        self.unihash_cache = bb.cache.SimpleCache("3")
-        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
+        self.unihash_cache = bb.cache.SimpleCache("4")
+        self.hashserv_cache = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
         self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
         self.tidtopn = {}
 
@@ -416,7 +416,7 @@  class SignatureGeneratorBasic(SignatureGenerator):
             bb.fetch2.fetcher_parse_done()
 
     def save_unitaskhashes(self):
-        self.unihash_cache.save(self.unitaskhashes)
+        self.unihash_cache.save(self.hashserv_cache)
 
     def copy_unitaskhashes(self, targetdir):
         self.unihash_cache.copyfile(targetdir)
@@ -630,10 +630,10 @@  class SignatureGeneratorUniHashMixIn(object):
 
         return super().get_stampfile_hash(tid)
 
-    def set_unihash(self, tid, unihash):
+    def set_unihash(self, tid, unihash, present):
         (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
         key = mc + ":" + self.tidtopn[tid] + ":" + taskname
-        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
+        self.hashserv_cache[key] = (self.taskhash[tid], unihash, present)
         self.unihash[tid] = unihash
 
     def _get_unihash(self, tid, checkkey=None):
@@ -641,11 +641,11 @@  class SignatureGeneratorUniHashMixIn(object):
             return None
         (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
         key = mc + ":" + self.tidtopn[tid] + ":" + taskname
-        if key not in self.unitaskhashes:
+        if key not in self.hashserv_cache:
             return None
         if not checkkey:
             checkkey = self.taskhash[tid]
-        (key, unihash) = self.unitaskhashes[key]
+        (key, unihash, present) = self.hashserv_cache[key]
         if key != checkkey:
             return None
         return unihash
@@ -754,12 +754,12 @@  class SignatureGeneratorUniHashMixIn(object):
                 # so it is reported it at debug level 2. If they differ, that
                 # is much more interesting, so it is reported at debug level 1
                 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
+                self.set_unihash(tid, unihash, True)
             else:
                 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
                 unihash = taskhash
+                self.set_unihash(tid, unihash, False)
 
-
-            self.set_unihash(tid, unihash)
             self.unihash[tid] = unihash
             result[tid] = unihash
 
@@ -835,7 +835,7 @@  class SignatureGeneratorUniHashMixIn(object):
                 if new_unihash != unihash:
                     hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
                     bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
-                    self.set_unihash(tid, new_unihash)
+                    self.set_unihash(tid, new_unihash, True)
                     d.setVar('BB_UNIHASH', new_unihash)
                 else:
                     hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
@@ -875,7 +875,7 @@  class SignatureGeneratorUniHashMixIn(object):
                 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
             elif finalunihash == wanted_unihash:
                 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
-                self.set_unihash(tid, finalunihash)
+                self.set_unihash(tid, finalunihash, True)
                 return True
             else:
                 # TODO: What to do here?