diff mbox series

[scarthgap,2.8,1/2] codeparser/data: Ensure module function contents changing is accounted for

Message ID 4bf332ccac283ca3440e81d8c781fcc23fe10b98.1721310419.git.steve@sakoman.com
State New
Headers show
Series [scarthgap,2.8,1/2] codeparser/data: Ensure module function contents changing is accounted for | expand

Commit Message

Steve Sakoman July 18, 2024, 1:48 p.m. UTC
From: Richard Purdie <richard.purdie@linuxfoundation.org>

Currently, if a pylib function changes contents, the taskhash remains
unchanged since we assume the functions have stable output. This is
probably a poor assumption so take the code of the function into account
in the taskhashes. This avoids certain frustrating build failures we've
been seeing in automated testing.

To make this work we have to add an extra entry to the python code parsing
cache so that we can store the hashed function contents for efficiency as
in the python module case, that isn't used as the key to the cache.

The cache version changes since we're adding data to the cache.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
(cherry picked from commit b2c3438ebe62793ebabe2c282534893908d520b4)
Signed-off-by: Steve Sakoman <steve@sakoman.com>
---
 lib/bb/codeparser.py | 28 +++++++++++++++++-----------
 lib/bb/data.py       |  2 +-
 2 files changed, 18 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
index 2e8b7ced3..0b2890cf8 100644
--- a/lib/bb/codeparser.py
+++ b/lib/bb/codeparser.py
@@ -82,14 +82,14 @@  def add_module_functions(fn, functions, namespace):
             if e in functions:
                 execs.remove(e)
                 execs.add(namespace + "." + e)
-        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
+        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy(), parser.extra]
         #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, fn, parser.references, parser.execs, parser.var_execs, parser.contains))
 
 def update_module_dependencies(d):
     for mod in modulecode_deps:
         excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
         if excludes:
-            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3]]
+            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3], modulecode_deps[mod][4]]
 
 # A custom getstate/setstate using tuples is actually worth 15% cachesize by
 # avoiding duplication of the attribute names!
@@ -112,21 +112,22 @@  class SetCache(object):
 codecache = SetCache()
 
 class pythonCacheLine(object):
-    def __init__(self, refs, execs, contains):
+    def __init__(self, refs, execs, contains, extra):
         self.refs = codecache.internSet(refs)
         self.execs = codecache.internSet(execs)
         self.contains = {}
         for c in contains:
             self.contains[c] = codecache.internSet(contains[c])
+        self.extra = extra
 
     def __getstate__(self):
-        return (self.refs, self.execs, self.contains)
+        return (self.refs, self.execs, self.contains, self.extra)
 
     def __setstate__(self, state):
-        (refs, execs, contains) = state
-        self.__init__(refs, execs, contains)
+        (refs, execs, contains, extra) = state
+        self.__init__(refs, execs, contains, extra)
     def __hash__(self):
-        l = (hash(self.refs), hash(self.execs))
+        l = (hash(self.refs), hash(self.execs), hash(self.extra))
         for c in sorted(self.contains.keys()):
             l = l + (c, hash(self.contains[c]))
         return hash(l)
@@ -155,7 +156,7 @@  class CodeParserCache(MultiProcessCache):
     # so that an existing cache gets invalidated. Additionally you'll need
     # to increment __cache_version__ in cache.py in order to ensure that old
     # recipe caches don't trigger "Taskhash mismatch" errors.
-    CACHE_VERSION = 11
+    CACHE_VERSION = 12
 
     def __init__(self):
         MultiProcessCache.__init__(self)
@@ -169,8 +170,8 @@  class CodeParserCache(MultiProcessCache):
         self.pythoncachelines = {}
         self.shellcachelines = {}
 
-    def newPythonCacheLine(self, refs, execs, contains):
-        cacheline = pythonCacheLine(refs, execs, contains)
+    def newPythonCacheLine(self, refs, execs, contains, extra):
+        cacheline = pythonCacheLine(refs, execs, contains, extra)
         h = hash(cacheline)
         if h in self.pythoncachelines:
             return self.pythoncachelines[h]
@@ -338,6 +339,7 @@  class PythonParser():
             self.contains = {}
             for i in codeparsercache.pythoncache[h].contains:
                 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
+            self.extra = codeparsercache.pythoncache[h].extra
             return
 
         if h in codeparsercache.pythoncacheextras:
@@ -346,6 +348,7 @@  class PythonParser():
             self.contains = {}
             for i in codeparsercache.pythoncacheextras[h].contains:
                 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
+            self.extra = codeparsercache.pythoncacheextras[h].extra
             return
 
         if fixedhash and not node:
@@ -364,8 +367,11 @@  class PythonParser():
                 self.visit_Call(n)
 
         self.execs.update(self.var_execs)
+        self.extra = None
+        if fixedhash:
+            self.extra = bbhash(str(node))
 
-        codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
+        codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains, self.extra)
 
 class ShellParser():
     def __init__(self, name, log):
diff --git a/lib/bb/data.py b/lib/bb/data.py
index 505f42950..f672a8445 100644
--- a/lib/bb/data.py
+++ b/lib/bb/data.py
@@ -293,7 +293,7 @@  def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_va
         if key in mod_funcs:
             exclusions = set()
             moddep = bb.codeparser.modulecode_deps[key]
-            value = handle_contains("", moddep[3], exclusions, d)
+            value = handle_contains(moddep[4], moddep[3], exclusions, d)
             return frozenset((moddep[0] | keys & moddep[1]) - ignored_vars), value
 
         if key[-1] == ']':