diff mbox series

data/siggen: Switch to use frozensets and optimize

Message ID 20221123001403.3696092-1-richard.purdie@linuxfoundation.org
State New
Headers show
Series data/siggen: Switch to use frozensets and optimize | expand

Commit Message

Richard Purdie Nov. 23, 2022, 12:14 a.m. UTC
Python handles frozensets a little more optimally than normal sets. Once we
finish parsing, we don't edit this data so we can convert to them.

To do that, we need to stop changing them so process ignore_deps earlier
then we can freeze the data and keep it frozen.

This has the side effect that we need to be careful to sort the data
in some of the variables when calculating the hashes.

Overall this does seem to show a decent parsing time speed improvement
of 20-25% in a local test but this would be highly setup dependent.

Also ensure the sigdata can handle exported frozenset and make it import
back to them instead of sets.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/data.py   | 14 ++++++--------
 lib/bb/siggen.py |  6 +++---
 2 files changed, 9 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/data.py b/lib/bb/data.py
index 3a6af325f4..430d444203 100644
--- a/lib/bb/data.py
+++ b/lib/bb/data.py
@@ -276,7 +276,8 @@  def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
             value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True)
             deps |= parser.references
             deps = deps | (keys & parser.execs)
-            return deps, value
+            deps -= ignored_vars
+            return frozenset(deps), value
         varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {}
         vardeps = varflags.get("vardeps")
         exclusions = varflags.get("vardepsexclude", "").split()
@@ -359,12 +360,13 @@  def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
 
         deps |= set((vardeps or "").split())
         deps -= set(exclusions)
+        deps -= ignored_vars
     except bb.parse.SkipRecipe:
         raise
     except Exception as e:
         bb.warn("Exception during build_dependencies for %s" % key)
         raise
-    return deps, value
+    return frozenset(deps), value
     #bb.note("Variable %s references %s and calls %s" % (key, str(deps), str(execs)))
     #d.setVarFlag(key, "vardeps", deps)
 
@@ -383,7 +385,7 @@  def generate_dependencies(d, ignored_vars):
         newdeps = deps[task]
         seen = set()
         while newdeps:
-            nextdeps = newdeps - ignored_vars
+            nextdeps = newdeps
             seen |= nextdeps
             newdeps = set()
             for dep in nextdeps:
@@ -407,7 +409,6 @@  def generate_dependency_hash(tasklist, gendeps, lookupcache, ignored_vars, fn):
         else:
             data = [data]
 
-        gendeps[task] -= ignored_vars
         newdeps = gendeps[task]
         seen = set()
         while newdeps:
@@ -415,9 +416,6 @@  def generate_dependency_hash(tasklist, gendeps, lookupcache, ignored_vars, fn):
             seen |= nextdeps
             newdeps = set()
             for dep in nextdeps:
-                if dep in ignored_vars:
-                    continue
-                gendeps[dep] -= ignored_vars
                 newdeps |= gendeps[dep]
             newdeps -= seen
 
@@ -429,7 +427,7 @@  def generate_dependency_hash(tasklist, gendeps, lookupcache, ignored_vars, fn):
                 data.append(str(var))
         k = fn + ":" + task
         basehash[k] = hashlib.sha256("".join(data).encode("utf-8")).hexdigest()
-        taskdeps[task] = alldeps
+        taskdeps[task] = frozenset(seen)
 
     return taskdeps, basehash
 
diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index 07bb529452..72b906c153 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -25,13 +25,13 @@  hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
 
 class SetEncoder(json.JSONEncoder):
     def default(self, obj):
-        if isinstance(obj, set):
+        if isinstance(obj, set) or isinstance(obj, frozenset):
             return dict(_set_object=list(sorted(obj)))
         return json.JSONEncoder.default(self, obj)
 
 def SetDecoder(dct):
     if '_set_object' in dct:
-        return set(dct['_set_object'])
+        return frozenset(dct['_set_object'])
     return dct
 
 def init(d):
@@ -1056,7 +1056,7 @@  def calc_basehash(sigdata):
         basedata = ''
 
     alldeps = sigdata['taskdeps']
-    for dep in alldeps:
+    for dep in sorted(alldeps):
         basedata = basedata + dep
         val = sigdata['varvals'][dep]
         if val is not None: