diff mbox series

[5/6] data: Add sup0port for new BB_HASH_CODEPARSER_VALS for cache optimisation

Message ID 20230121212305.2171310-5-richard.purdie@linuxfoundation.org
State New
Headers show
Series [1/6] server/process: Fix lockfile contents check bug | expand

Commit Message

Richard Purdie Jan. 21, 2023, 9:23 p.m. UTC
Currently the codeparser cache ends up being extended for every parse run
since there are values in the functions such as the result of os.getpid()
from LOGFIFO in OE-Core.

Digging into that issue, there are also lots of similar but different
functions being parsed where the change might just be a path to WORKDIR,
a change in PN or PV or something like DATE/TIME.

There is no reason we have to use these changing values when computing the
dependenies of the functions. Even with a small tweak like:

BB_HASH_CODEPARSER_VALS = "LOGFIFO=/ T=/ WORKDIR=/ DATE=1234 TIME=1234 PV=0.0-1 PN=nopn"

the cache is reduced from ~4.6MB, increasing by ~300kb for every parse run
to around 1.3MB and remaining static for oe-core and meta-oe. In my local
build, admittedly heavily experimented with, the cache had grown to 120MB.

The benefits of doing this are:

 * faster load time for bitbake since the cache is smaller to read from disk
   and load into memory
 * being able to skip saving the cache upon shutdown
 * lower memory footprint for bitbake
 * faster codeparser data lookups (since there is less data to search)

We only use these special values when passing code fragments to the codeparser
to parse so the real variable values should otherwise be used in the hash data.

The overall effect of this change, combined with others to avoid saving unchanged
cache files can be ~2s on a ~16s parse on my local system and results in a more
responsive feeling bitbake. It also allows parsing performance to be investigated
more consistently.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/data.py             | 15 ++++++++++-----
 lib/bb/tests/codeparser.py | 14 +++++++-------
 2 files changed, 17 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/data.py b/lib/bb/data.py
index 841369699e..f3ae062022 100644
--- a/lib/bb/data.py
+++ b/lib/bb/data.py
@@ -261,7 +261,7 @@  def emit_func_python(func, o=sys.__stdout__, d = init()):
                newdeps |= set((d.getVarFlag(dep, "vardeps") or "").split())
         newdeps -= seen
 
-def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d):
+def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d, codeparsedata):
     def handle_contains(value, contains, exclusions, d):
         newvalue = []
         if value:
@@ -312,14 +312,14 @@  def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_va
             value = varflags.get("vardepvalue")
         elif varflags.get("func"):
             if varflags.get("python"):
-                value = d.getVarFlag(key, "_content", False)
+                value = codeparsedata.getVarFlag(key, "_content", False)
                 parser = bb.codeparser.PythonParser(key, logger)
                 parser.parse_python(value, filename=varflags.get("filename"), lineno=varflags.get("lineno"))
                 deps = deps | parser.references
                 deps = deps | (keys & parser.execs)
                 value = handle_contains(value, parser.contains, exclusions, d)
             else:
-                value, parsedvar = d.getVarFlag(key, "_content", False, retparser=True)
+                value, parsedvar = codeparsedata.getVarFlag(key, "_content", False, retparser=True)
                 parser = bb.codeparser.ShellParser(key, logger)
                 parser.parse_shell(parsedvar.value)
                 deps = deps | shelldeps
@@ -378,12 +378,17 @@  def generate_dependencies(d, ignored_vars):
     shelldeps = set(key for key in d.getVar("__exportlist", False) if d.getVarFlag(key, "export", False) and not d.getVarFlag(key, "unexport", False))
     varflagsexcl = d.getVar('BB_SIGNATURE_EXCLUDE_FLAGS')
 
+    codeparserd = d.createCopy()
+    for forced in (d.getVar('BB_HASH_CODEPARSER_VALS') or "").split():
+        key, value = forced.split("=", 1)
+        codeparserd.setVar(key, value)
+
     deps = {}
     values = {}
 
     tasklist = d.getVar('__BBTASKS', False) or []
     for task in tasklist:
-        deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
+        deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d, codeparserd)
         newdeps = deps[task]
         seen = set()
         while newdeps:
@@ -392,7 +397,7 @@  def generate_dependencies(d, ignored_vars):
             newdeps = set()
             for dep in nextdeps:
                 if dep not in deps:
-                    deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
+                    deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d, codeparserd)
                 newdeps |=  deps[dep]
             newdeps -= seen
         #print "For %s: %s" % (task, str(deps[task]))
diff --git a/lib/bb/tests/codeparser.py b/lib/bb/tests/codeparser.py
index a508f23bcb..7f5d59ca74 100644
--- a/lib/bb/tests/codeparser.py
+++ b/lib/bb/tests/codeparser.py
@@ -318,7 +318,7 @@  d.getVar(a(), False)
             "filename": "example.bb",
         })
 
-        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
 
         self.assertEqual(deps, set(["somevar", "bar", "something", "inexpand", "test", "test2", "a"]))
 
@@ -365,7 +365,7 @@  esac
         self.d.setVarFlags("FOO", {"func": True})
         self.setEmptyVars(execs)
 
-        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
 
         self.assertEqual(deps, set(["somevar", "inverted"] + execs))
 
@@ -375,7 +375,7 @@  esac
         self.d.setVar("FOO", "foo=oe_libinstall; eval $foo")
         self.d.setVarFlag("FOO", "vardeps", "oe_libinstall")
 
-        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
 
         self.assertEqual(deps, set(["oe_libinstall"]))
 
@@ -384,7 +384,7 @@  esac
         self.d.setVar("FOO", "foo=oe_libinstall; eval $foo")
         self.d.setVarFlag("FOO", "vardeps", "${@'oe_libinstall'}")
 
-        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("FOO", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
 
         self.assertEqual(deps, set(["oe_libinstall"]))
 
@@ -399,7 +399,7 @@  esac
         # Check dependencies
         self.d.setVar('ANOTHERVAR', expr)
         self.d.setVar('TESTVAR', 'anothervalue testval testval2')
-        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
         self.assertEqual(sorted(values.splitlines()),
                          sorted([expr,
                           'TESTVAR{anothervalue} = Set',
@@ -418,14 +418,14 @@  esac
         self.d.setVar('ANOTHERVAR', varval)
         self.d.setVar('TESTVAR', 'anothervalue testval testval2')
         self.d.setVar('TESTVAR2', 'testval3')
-        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(["TESTVAR"]), self.d)
+        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(["TESTVAR"]), self.d, self.d)
         self.assertEqual(sorted(values.splitlines()), sorted([varval]))
         self.assertEqual(deps, set(["TESTVAR2"]))
         self.assertEqual(self.d.getVar('ANOTHERVAR').split(), ['testval3', 'anothervalue'])
 
         # Check the vardepsexclude flag is handled by contains functionality
         self.d.setVarFlag('ANOTHERVAR', 'vardepsexclude', 'TESTVAR')
-        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(), self.d)
+        deps, values = bb.data.build_dependencies("ANOTHERVAR", set(self.d.keys()), set(), set(), set(), set(), self.d, self.d)
         self.assertEqual(sorted(values.splitlines()), sorted([varval]))
         self.assertEqual(deps, set(["TESTVAR2"]))
         self.assertEqual(self.d.getVar('ANOTHERVAR').split(), ['testval3', 'anothervalue'])