Message ID | 20221127213610.539976-2-richard.purdie@linuxfoundation.org |
---|---|
State | New |
Headers | show |
Series | [RFC,1/2] parse: Add support for addpylib conf file directive | expand |
On Sun, Nov 27, 2022 at 3:36 PM Richard Purdie <richard.purdie@linuxfoundation.org> wrote: > > Moving code into python modules is a very effective way to reduce parsing > time and overhead in recipes. The downside has always been that any > dependency information on which variables those functions access is lost > and the hashes can therefore become less reliable. > > This patch adds parsing of the imported module functions and that dependency > information is them injected back into the hash dependency information. > > Intermodule function references are resolved to the full function > call names in our module namespace to ensure interfunction dependencies > are correctly handled too. > > Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> > --- > lib/bb/codeparser.py | 2 ++ > lib/bb/data.py | 40 +++++++++++++++++++++++----------------- > lib/bb/parse/ast.py | 40 +++++++++++++++++++++++++++++++++++++++- > 3 files changed, 64 insertions(+), 18 deletions(-) > > diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py > index 9d66d3ae41..e7d7ebe048 100644 > --- a/lib/bb/codeparser.py > +++ b/lib/bb/codeparser.py > @@ -36,6 +36,8 @@ from bb.cache import MultiProcessCache > > logger = logging.getLogger('BitBake.CodeParser') > > +modulecode_deps = {} > + > def bbhash(s): > return hashlib.sha256(s.encode("utf-8")).hexdigest() > > diff --git a/lib/bb/data.py b/lib/bb/data.py > index 22d314d6c2..ab1a5959ba 100644 > --- a/lib/bb/data.py > +++ b/lib/bb/data.py > @@ -261,22 +261,9 @@ def emit_func_python(func, o=sys.__stdout__, d = init()): > newdeps |= set((d.getVarFlag(dep, "vardeps") or "").split()) > newdeps -= seen > > -def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): > +def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d): > deps = set() > try: > - if key[-1] == ']': > - vf = key[:-1].split('[') > - if vf[1] == "vardepvalueexclude": > - return deps, "" > - value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True) > - deps |= parser.references > - deps = deps | (keys & parser.execs) > - deps -= ignored_vars > - return frozenset(deps), value > - varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {} > - vardeps = varflags.get("vardeps") > - exclusions = varflags.get("vardepsexclude", "").split() > - > def handle_contains(value, contains, exclusions, d): > newvalue = [] > if value: > @@ -294,6 +281,24 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): > newvalue.append("\n%s{%s} = Set" % (k, item)) > return "".join(newvalue) > > + if key in mod_funcs: > + exclusions = set() > + moddep = bb.codeparser.modulecode_deps[key] > + value = handle_contains("", moddep[3], exclusions, d) > + return frozenset(moddep[0] - ignored_vars), value > + if key[-1] == ']': > + vf = key[:-1].split('[') > + if vf[1] == "vardepvalueexclude": > + return deps, "" > + value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True) > + deps |= parser.references > + deps = deps | (keys & parser.execs) > + deps -= ignored_vars > + return frozenset(deps), value > + varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {} > + vardeps = varflags.get("vardeps") > + exclusions = varflags.get("vardepsexclude", "").split() > + > def handle_remove(value, deps, removes, d): > for r in sorted(removes): > r2 = d.expandWithRefs(r, None) > @@ -367,7 +372,8 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): > > def generate_dependencies(d, ignored_vars): > > - keys = set(key for key in d if not key.startswith("__")) > + mod_funcs = set(bb.codeparser.modulecode_deps.keys()) > + keys = set(key for key in d if not key.startswith("__")) | mod_funcs > shelldeps = set(key for key in d.getVar("__exportlist", False) if d.getVarFlag(key, "export", False) and not d.getVarFlag(key, "unexport", False)) > varflagsexcl = d.getVar('BB_SIGNATURE_EXCLUDE_FLAGS') > > @@ -376,7 +382,7 @@ def generate_dependencies(d, ignored_vars): > > tasklist = d.getVar('__BBTASKS', False) or [] > for task in tasklist: > - deps[task], values[task] = build_dependencies(task, keys, shelldeps, varflagsexcl, ignored_vars, d) > + deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d) > newdeps = deps[task] > seen = set() > while newdeps: > @@ -385,7 +391,7 @@ def generate_dependencies(d, ignored_vars): > newdeps = set() > for dep in nextdeps: > if dep not in deps: > - deps[dep], values[dep] = build_dependencies(dep, keys, shelldeps, varflagsexcl, ignored_vars, d) > + deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d) > newdeps |= deps[dep] > newdeps -= seen > #print "For %s: %s" % (task, str(deps[task])) > diff --git a/lib/bb/parse/ast.py b/lib/bb/parse/ast.py > index 7bfee9ed67..7dbb1adfa8 100644 > --- a/lib/bb/parse/ast.py > +++ b/lib/bb/parse/ast.py > @@ -10,6 +10,7 @@ > # > > import sys > +import inspect > import bb > from bb import methodpool > from bb.parse import logger > @@ -284,7 +285,44 @@ class PyLibNode(AstNode): > bb.utils._context[self.namespace] = __import__(self.namespace) > toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", []) > for i in toimport: > - bb.utils._context[i.split(".", 1)[0]] = __import__(i) > + base = i.split(".", 1)[0] > + bb.utils._context[base] = __import__(i) > + if "." in i: > + mod = getattr(bb.utils._context[base], i.split(".", 1)[1]) > + funcs = dir(mod) > + for f in funcs: > + if f.startswith("_"): > + continue > + fcall = getattr(mod, f) > + if not callable(fcall): > + continue > + src = inspect.getsource(fcall) While the text of a module should always be present, making python go back to it is probably inefficient when it's already gone through the effort of converting it to byte code (I beleive it has to go find the corresponding .py file for the .pyc file it already loaded into memory). Perhaps we can split out the part of PythonParser that deals with the compiled AST to a new function and call that with inspect? Maybe something like: bytecode = inspect.getmembers_static(fcall, "__code__")[0][1] parser.parse_python_ast(bytecode) > + name = "%s.%s" % (i, f) > + parser = bb.codeparser.PythonParser(name, logger) > + parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1) > + execs = parser.execs > + # Expand internal module exec references > + for e in parser.execs: > + if e in funcs: > + execs.remove(e) You are modifying parser.execs in place which will ruin your iteration. I think you probably want: execs = {f"{i}.{e}" if e in funcs else e for e in parser.execs} Or if you don't want list comprehension: execs = [] for e in parse.execs: if e in funcs: execs.add(f"{i}.{e}") else: execs.add(e) > + execs.add(i + "." + e) > + bb.codeparser.modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()] > + #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, src, parser.references, parser.execs, parser.var_execs, parser.contains)) > + > + # Resolve intermodule references now, stack dependencies > + found = True > + while found: > + found = False > + funcs = set(bb.codeparser.modulecode_deps.keys()) > + for entry in bb.codeparser.modulecode_deps: > + for dup in (bb.codeparser.modulecode_deps[entry][1] & funcs): > + found = True > + bb.codeparser.modulecode_deps[entry][1].remove(dup) > + bb.codeparser.modulecode_deps[entry][0] |= bb.codeparser.modulecode_deps[dup][0] > + bb.codeparser.modulecode_deps[entry][1] |= bb.codeparser.modulecode_deps[dup][1] > + bb.codeparser.modulecode_deps[entry][2] |= bb.codeparser.modulecode_deps[dup][2] > + bb.codeparser.modulecode_deps[entry][3] |= bb.codeparser.modulecode_deps[dup][3] > + > except AttributeError as e: > bb.error("Error importing OE modules: %s" % str(e)) > > -- > 2.34.1 > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#14116): https://lists.openembedded.org/g/bitbake-devel/message/14116 > Mute This Topic: https://lists.openembedded.org/mt/95297764/3616693 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [JPEWhacker@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
On Mon, 2022-11-28 at 08:32 -0600, Joshua Watt wrote: > On Sun, Nov 27, 2022 at 3:36 PM Richard Purdie > <richard.purdie@linuxfoundation.org> wrote: > > --- a/lib/bb/parse/ast.py > > +++ b/lib/bb/parse/ast.py > > @@ -10,6 +10,7 @@ > > # > > > > import sys > > +import inspect > > import bb > > from bb import methodpool > > from bb.parse import logger > > @@ -284,7 +285,44 @@ class PyLibNode(AstNode): > > bb.utils._context[self.namespace] = __import__(self.namespace) > > toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", []) > > for i in toimport: > > - bb.utils._context[i.split(".", 1)[0]] = __import__(i) > > + base = i.split(".", 1)[0] > > + bb.utils._context[base] = __import__(i) > > + if "." in i: > > + mod = getattr(bb.utils._context[base], i.split(".", 1)[1]) > > + funcs = dir(mod) > > + for f in funcs: > > + if f.startswith("_"): > > + continue > > + fcall = getattr(mod, f) > > + if not callable(fcall): > > + continue > > + src = inspect.getsource(fcall) > > While the text of a module should always be present, making python go > back to it is probably inefficient when it's already gone through the > effort of converting it to byte code (I beleive it has to go find the > corresponding .py file for the .pyc file it already loaded into > memory). Perhaps we can split out the part of PythonParser that deals > with the compiled AST to a new function and call that with inspect? > > Maybe something like: > > bytecode = inspect.getmembers_static(fcall, "__code__")[0][1] > parser.parse_python_ast(bytecode) Sadly we need the text since PythonParser has it's own optimisations and hash lookups based upon that :/. I agree this is probably not as efficient as it should be though. We also still don't currently get the lineno information we need either. > > > + name = "%s.%s" % (i, f) > > + parser = bb.codeparser.PythonParser(name, logger) > > + parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1) > > + execs = parser.execs > > + # Expand internal module exec references > > + for e in parser.execs: > > + if e in funcs: > > + execs.remove(e) > > You are modifying parser.execs in place which will ruin your > iteration. Well spotted. It was meant to be execs = parser.execs.copy() (see the other copy()'s of the other bits later. > I think you probably want: > > execs = {f"{i}.{e}" if e in funcs else e for e in parser.execs} > > Or if you don't want list comprehension: > > execs = [] > for e in parse.execs: > if e in funcs: > execs.add(f"{i}.{e}") > else: > execs.add(e) Cheers, Richard
diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py index 9d66d3ae41..e7d7ebe048 100644 --- a/lib/bb/codeparser.py +++ b/lib/bb/codeparser.py @@ -36,6 +36,8 @@ from bb.cache import MultiProcessCache logger = logging.getLogger('BitBake.CodeParser') +modulecode_deps = {} + def bbhash(s): return hashlib.sha256(s.encode("utf-8")).hexdigest() diff --git a/lib/bb/data.py b/lib/bb/data.py index 22d314d6c2..ab1a5959ba 100644 --- a/lib/bb/data.py +++ b/lib/bb/data.py @@ -261,22 +261,9 @@ def emit_func_python(func, o=sys.__stdout__, d = init()): newdeps |= set((d.getVarFlag(dep, "vardeps") or "").split()) newdeps -= seen -def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): +def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d): deps = set() try: - if key[-1] == ']': - vf = key[:-1].split('[') - if vf[1] == "vardepvalueexclude": - return deps, "" - value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True) - deps |= parser.references - deps = deps | (keys & parser.execs) - deps -= ignored_vars - return frozenset(deps), value - varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {} - vardeps = varflags.get("vardeps") - exclusions = varflags.get("vardepsexclude", "").split() - def handle_contains(value, contains, exclusions, d): newvalue = [] if value: @@ -294,6 +281,24 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): newvalue.append("\n%s{%s} = Set" % (k, item)) return "".join(newvalue) + if key in mod_funcs: + exclusions = set() + moddep = bb.codeparser.modulecode_deps[key] + value = handle_contains("", moddep[3], exclusions, d) + return frozenset(moddep[0] - ignored_vars), value + if key[-1] == ']': + vf = key[:-1].split('[') + if vf[1] == "vardepvalueexclude": + return deps, "" + value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True) + deps |= parser.references + deps = deps | (keys & parser.execs) + deps -= ignored_vars + return frozenset(deps), value + varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {} + vardeps = varflags.get("vardeps") + exclusions = varflags.get("vardepsexclude", "").split() + def handle_remove(value, deps, removes, d): for r in sorted(removes): r2 = d.expandWithRefs(r, None) @@ -367,7 +372,8 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d): def generate_dependencies(d, ignored_vars): - keys = set(key for key in d if not key.startswith("__")) + mod_funcs = set(bb.codeparser.modulecode_deps.keys()) + keys = set(key for key in d if not key.startswith("__")) | mod_funcs shelldeps = set(key for key in d.getVar("__exportlist", False) if d.getVarFlag(key, "export", False) and not d.getVarFlag(key, "unexport", False)) varflagsexcl = d.getVar('BB_SIGNATURE_EXCLUDE_FLAGS') @@ -376,7 +382,7 @@ def generate_dependencies(d, ignored_vars): tasklist = d.getVar('__BBTASKS', False) or [] for task in tasklist: - deps[task], values[task] = build_dependencies(task, keys, shelldeps, varflagsexcl, ignored_vars, d) + deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d) newdeps = deps[task] seen = set() while newdeps: @@ -385,7 +391,7 @@ def generate_dependencies(d, ignored_vars): newdeps = set() for dep in nextdeps: if dep not in deps: - deps[dep], values[dep] = build_dependencies(dep, keys, shelldeps, varflagsexcl, ignored_vars, d) + deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d) newdeps |= deps[dep] newdeps -= seen #print "For %s: %s" % (task, str(deps[task])) diff --git a/lib/bb/parse/ast.py b/lib/bb/parse/ast.py index 7bfee9ed67..7dbb1adfa8 100644 --- a/lib/bb/parse/ast.py +++ b/lib/bb/parse/ast.py @@ -10,6 +10,7 @@ # import sys +import inspect import bb from bb import methodpool from bb.parse import logger @@ -284,7 +285,44 @@ class PyLibNode(AstNode): bb.utils._context[self.namespace] = __import__(self.namespace) toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", []) for i in toimport: - bb.utils._context[i.split(".", 1)[0]] = __import__(i) + base = i.split(".", 1)[0] + bb.utils._context[base] = __import__(i) + if "." in i: + mod = getattr(bb.utils._context[base], i.split(".", 1)[1]) + funcs = dir(mod) + for f in funcs: + if f.startswith("_"): + continue + fcall = getattr(mod, f) + if not callable(fcall): + continue + src = inspect.getsource(fcall) + name = "%s.%s" % (i, f) + parser = bb.codeparser.PythonParser(name, logger) + parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1) + execs = parser.execs + # Expand internal module exec references + for e in parser.execs: + if e in funcs: + execs.remove(e) + execs.add(i + "." + e) + bb.codeparser.modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()] + #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, src, parser.references, parser.execs, parser.var_execs, parser.contains)) + + # Resolve intermodule references now, stack dependencies + found = True + while found: + found = False + funcs = set(bb.codeparser.modulecode_deps.keys()) + for entry in bb.codeparser.modulecode_deps: + for dup in (bb.codeparser.modulecode_deps[entry][1] & funcs): + found = True + bb.codeparser.modulecode_deps[entry][1].remove(dup) + bb.codeparser.modulecode_deps[entry][0] |= bb.codeparser.modulecode_deps[dup][0] + bb.codeparser.modulecode_deps[entry][1] |= bb.codeparser.modulecode_deps[dup][1] + bb.codeparser.modulecode_deps[entry][2] |= bb.codeparser.modulecode_deps[dup][2] + bb.codeparser.modulecode_deps[entry][3] |= bb.codeparser.modulecode_deps[dup][3] + except AttributeError as e: bb.error("Error importing OE modules: %s" % str(e))
Moving code into python modules is a very effective way to reduce parsing time and overhead in recipes. The downside has always been that any dependency information on which variables those functions access is lost and the hashes can therefore become less reliable. This patch adds parsing of the imported module functions and that dependency information is them injected back into the hash dependency information. Intermodule function references are resolved to the full function call names in our module namespace to ensure interfunction dependencies are correctly handled too. Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> --- lib/bb/codeparser.py | 2 ++ lib/bb/data.py | 40 +++++++++++++++++++++++----------------- lib/bb/parse/ast.py | 40 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 18 deletions(-)