[6/6] siggen/runqueue: Allow skipping of rehash if not present on hashserve

Message ID	20240524151200.2449512-6-richard.purdie@linuxfoundation.org
State	New
Headers	show Return-Path: <richard.purdie@linuxfoundation.org> ip: 209.85.128.47, mailfrom: richard.purdie@linuxfoundation.org) From: Richard Purdie <richard.purdie@linuxfoundation.org> To: bitbake-devel@lists.openembedded.org Subject: [PATCH 6/6] siggen/runqueue: Allow skipping of rehash if not present on hashserve Date: Fri, 24 May 2024 16:12:00 +0100 Message-Id: <20240524151200.2449512-6-richard.purdie@linuxfoundation.org> In-Reply-To: <20240524151200.2449512-1-richard.purdie@linuxfoundation.org> References: <20240524151200.2449512-1-richard.purdie@linuxfoundation.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[1/6] siggen/runqueue: Store whether the hash was present on the hashserver or not \| expand [1/6] siggen/runqueue: Store whether the hash was present on the hashserver or not [2/6] runqueue: Add timing warnings around slow loops [3/6] runqueue: Allow rehash loop to exit in case of interrupts [4/6] runqueue: Process unihashes in parallel at init [5/6] runqueue: Improve rehash get_unihash parallelism [6/6] siggen/runqueue: Allow skipping of rehash if not present on hashserve

diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py index 5626ffcccb..0fb9edc77e 100644 --- a/lib/bb/runqueue.py +++ b/lib/bb/runqueue.py @@ -472,6 +472,7 @@ class RunQueueData: self.reset() def reset(self): + self.hashserve_valid = set() self.runtaskentries = {} def runq_depends_names(self, ids): @@ -1287,11 +1288,23 @@ class RunQueueData: # get_taskhash for a given tid *must* be called before get_unihash* below self.runtaskentries[tid].hash = bb.parse.siggen.get_taskhash(tid, self.runtaskentries[tid].depends, self.dataCaches) ready.add(tid) - unihashes = bb.parse.siggen.get_unihashes(ready) + + ready2 = set() + for tid in ready: + if self.runtaskentries[tid].depends.issubset(self.hashserve_valid): + ready2.add(tid) + unihashes = bb.parse.siggen.get_unihashes(ready2) for tid in ready: dealtwith.add(tid) todeal.remove(tid) - self.runtaskentries[tid].unihash = unihashes[tid] + if tid in unihashes: + self.runtaskentries[tid].unihash, present = unihashes[tid] + if present: + self.hashserve_valid.add(tid) + else: + taskhash = self.runtaskentries[tid].hash + bb.parse.siggen.set_unihash(tid, taskhash, False) + self.runtaskentries[tid].unihash = taskhash bb.event.check_for_interrupts(self.cooker.data) @@ -2585,13 +2598,25 @@ class RunQueueExecute: # get_taskhash for a given tid *must* be called before get_unihash* below ready[tid] = bb.parse.siggen.get_taskhash(tid, self.rqdata.runtaskentries[tid].depends, self.rqdata.dataCaches) - unihashes = bb.parse.siggen.get_unihashes(ready.keys()) + toquery = set() + for tid in ready: + if self.rqdata.runtaskentries[tid].depends.issubset(self.rqdata.hashserve_valid): + toquery.add(tid) + + unihashes = bb.parse.siggen.get_unihashes(toquery) for tid in ready: orighash = self.rqdata.runtaskentries[tid].hash newhash = ready[tid] origuni = self.rqdata.runtaskentries[tid].unihash - newuni = unihashes[tid] + + if tid in unihashes: + newuni, present = unihashes[tid] + if present: + self.rqdata.hashserve_valid.add(tid) + else: + bb.parse.siggen.set_unihash(tid, origuni, False) + newuni = origuni # FIXME, need to check it can come from sstate at all for determinism? remapped = False @@ -2623,6 +2648,8 @@ class RunQueueExecute: if (endtime-starttime > 60): hashequiv_logger.verbose("Rehash loop took more than 60s: %s" % (endtime-starttime)) + bb.parse.siggen.save_unitaskhashes() + if changed: for mc in self.rq.worker: RunQueue.send_pickled_data(self.rq.worker[mc].process, bb.parse.siggen.get_taskhashes(), "newtaskhashes") diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py index e8fe806c36..56528ea10a 100644 --- a/lib/bb/siggen.py +++ b/lib/bb/siggen.py @@ -112,16 +112,19 @@ class SignatureGenerator(object): self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag def get_cached_unihash(self, tid): - return None - - def get_unihash(self, tid): - unihash = self.get_cached_unihash(tid) - if unihash: + return None, False + + def get_unihash(self, tid, presenceinfo=False): + unihash, present = self.get_cached_unihash(tid) + if not unihash: + unihash, present = self.taskhash[tid], False + if presenceinfo: + return unihash, present + else: return unihash - return self.taskhash[tid] def get_unihashes(self, tids): - return {tid: self.get_unihash(tid) for tid in tids} + return {tid: self.get_unihash(tid, True) for tid in tids} def prep_taskhash(self, tid, deps, dataCaches): return @@ -624,7 +627,7 @@ class SignatureGeneratorUniHashMixIn(object): # If a unique hash is reported, use it as the stampfile hash. This # ensures that if a task won't be re-run if the taskhash changes, # but it would result in the same output hash - unihash = self._get_unihash(tid) + unihash, _ = self._get_unihash(tid) if unihash is not None: return unihash @@ -638,17 +641,17 @@ class SignatureGeneratorUniHashMixIn(object): def _get_unihash(self, tid, checkkey=None): if tid not in self.tidtopn: - return None + return None, False (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) key = mc + ":" + self.tidtopn[tid] + ":" + taskname if key not in self.hashserv_cache: - return None + return None, False if not checkkey: checkkey = self.taskhash[tid] (key, unihash, present) = self.hashserv_cache[key] if key != checkkey: - return None - return unihash + return None, False + return unihash, present def get_cached_unihash(self, tid): taskhash = self.taskhash[tid] @@ -656,16 +659,16 @@ class SignatureGeneratorUniHashMixIn(object): # If its not a setscene task we can return if self.setscenetasks and tid not in self.setscenetasks: self.unihash[tid] = None - return taskhash + return taskhash, True # TODO: This cache can grow unbounded. It probably only needs to keep # for each task - unihash = self._get_unihash(tid) + unihash, present = self._get_unihash(tid) if unihash is not None: self.unihash[tid] = unihash - return unihash + return unihash, present - return None + return None, False def _get_method(self, tid): method = self.method @@ -703,8 +706,10 @@ class SignatureGeneratorUniHashMixIn(object): return result - def get_unihash(self, tid): - return self.get_unihashes([tid])[tid] + def get_unihash(self, tid, presenceinfo=False): + if presenceinfo: + return self.get_unihashes([tid])[tid] + return self.get_unihashes([tid])[tid][0] def get_unihashes(self, tids): """ @@ -716,9 +721,9 @@ class SignatureGeneratorUniHashMixIn(object): query_result = {} for tid in tids: - unihash = self.get_cached_unihash(tid) + unihash, present = self.get_cached_unihash(tid) if unihash: - result[tid] = unihash + result[tid] = (unihash, present) else: queries[tid] = (self._get_method(tid), self.taskhash[tid]) @@ -754,14 +759,15 @@ class SignatureGeneratorUniHashMixIn(object): # so it is reported it at debug level 2. If they differ, that # is much more interesting, so it is reported at debug level 1 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) - self.set_unihash(tid, unihash, True) + present = True else: hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) unihash = taskhash - self.set_unihash(tid, unihash, False) + present = False + self.set_unihash(tid, unihash, present) self.unihash[tid] = unihash - result[tid] = unihash + result[tid] = (unihash, present) return result @@ -784,7 +790,7 @@ class SignatureGeneratorUniHashMixIn(object): return # Sanity checks - cache_unihash = self._get_unihash(tid, checkkey=taskhash) + cache_unihash, _ = self._get_unihash(tid, checkkey=taskhash) if cache_unihash is None: bb.fatal('%s not in unihash cache. Please report this error' % key)

[6/6] siggen/runqueue: Allow skipping of rehash if not present on hashserve

Commit Message

Patch