diff mbox series

[RFC,2/2] ast/data: Add dependencies from python module functions

Message ID 20221127213610.539976-2-richard.purdie@linuxfoundation.org
State New
Headers show
Series [RFC,1/2] parse: Add support for addpylib conf file directive | expand

Commit Message

Richard Purdie Nov. 27, 2022, 9:36 p.m. UTC
Moving code into python modules is a very effective way to reduce parsing
time and overhead in recipes. The downside has always been that any
dependency information on which variables those functions access is lost
and the hashes can therefore become less reliable.

This patch adds parsing of the imported module functions and that dependency
information is them injected back into the hash dependency information.

Intermodule function references are resolved to the full function
call names in our module namespace to ensure interfunction dependencies
are correctly handled too.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/codeparser.py |  2 ++
 lib/bb/data.py       | 40 +++++++++++++++++++++++-----------------
 lib/bb/parse/ast.py  | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 18 deletions(-)

Comments

Joshua Watt Nov. 28, 2022, 2:32 p.m. UTC | #1
On Sun, Nov 27, 2022 at 3:36 PM Richard Purdie
<richard.purdie@linuxfoundation.org> wrote:
>
> Moving code into python modules is a very effective way to reduce parsing
> time and overhead in recipes. The downside has always been that any
> dependency information on which variables those functions access is lost
> and the hashes can therefore become less reliable.
>
> This patch adds parsing of the imported module functions and that dependency
> information is them injected back into the hash dependency information.
>
> Intermodule function references are resolved to the full function
> call names in our module namespace to ensure interfunction dependencies
> are correctly handled too.
>
> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
> ---
>  lib/bb/codeparser.py |  2 ++
>  lib/bb/data.py       | 40 +++++++++++++++++++++++-----------------
>  lib/bb/parse/ast.py  | 40 +++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 64 insertions(+), 18 deletions(-)
>
> diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
> index 9d66d3ae41..e7d7ebe048 100644
> --- a/lib/bb/codeparser.py
> +++ b/lib/bb/codeparser.py
> @@ -36,6 +36,8 @@ from bb.cache import MultiProcessCache
>
>  logger = logging.getLogger('BitBake.CodeParser')
>
> +modulecode_deps = {}
> +
>  def bbhash(s):
>      return hashlib.sha256(s.encode("utf-8")).hexdigest()
>
> diff --git a/lib/bb/data.py b/lib/bb/data.py
> index 22d314d6c2..ab1a5959ba 100644
> --- a/lib/bb/data.py
> +++ b/lib/bb/data.py
> @@ -261,22 +261,9 @@ def emit_func_python(func, o=sys.__stdout__, d = init()):
>                 newdeps |= set((d.getVarFlag(dep, "vardeps") or "").split())
>          newdeps -= seen
>
> -def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
> +def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d):
>      deps = set()
>      try:
> -        if key[-1] == ']':
> -            vf = key[:-1].split('[')
> -            if vf[1] == "vardepvalueexclude":
> -                return deps, ""
> -            value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True)
> -            deps |= parser.references
> -            deps = deps | (keys & parser.execs)
> -            deps -= ignored_vars
> -            return frozenset(deps), value
> -        varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {}
> -        vardeps = varflags.get("vardeps")
> -        exclusions = varflags.get("vardepsexclude", "").split()
> -
>          def handle_contains(value, contains, exclusions, d):
>              newvalue = []
>              if value:
> @@ -294,6 +281,24 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
>                          newvalue.append("\n%s{%s} = Set" % (k, item))
>              return "".join(newvalue)
>
> +        if key in mod_funcs:
> +            exclusions = set()
> +            moddep = bb.codeparser.modulecode_deps[key]
> +            value = handle_contains("", moddep[3], exclusions, d)
> +            return frozenset(moddep[0] - ignored_vars), value
> +        if key[-1] == ']':
> +            vf = key[:-1].split('[')
> +            if vf[1] == "vardepvalueexclude":
> +                return deps, ""
> +            value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True)
> +            deps |= parser.references
> +            deps = deps | (keys & parser.execs)
> +            deps -= ignored_vars
> +            return frozenset(deps), value
> +        varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {}
> +        vardeps = varflags.get("vardeps")
> +        exclusions = varflags.get("vardepsexclude", "").split()
> +
>          def handle_remove(value, deps, removes, d):
>              for r in sorted(removes):
>                  r2 = d.expandWithRefs(r, None)
> @@ -367,7 +372,8 @@ def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
>
>  def generate_dependencies(d, ignored_vars):
>
> -    keys = set(key for key in d if not key.startswith("__"))
> +    mod_funcs = set(bb.codeparser.modulecode_deps.keys())
> +    keys = set(key for key in d if not key.startswith("__")) | mod_funcs
>      shelldeps = set(key for key in d.getVar("__exportlist", False) if d.getVarFlag(key, "export", False) and not d.getVarFlag(key, "unexport", False))
>      varflagsexcl = d.getVar('BB_SIGNATURE_EXCLUDE_FLAGS')
>
> @@ -376,7 +382,7 @@ def generate_dependencies(d, ignored_vars):
>
>      tasklist = d.getVar('__BBTASKS', False) or []
>      for task in tasklist:
> -        deps[task], values[task] = build_dependencies(task, keys, shelldeps, varflagsexcl, ignored_vars, d)
> +        deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
>          newdeps = deps[task]
>          seen = set()
>          while newdeps:
> @@ -385,7 +391,7 @@ def generate_dependencies(d, ignored_vars):
>              newdeps = set()
>              for dep in nextdeps:
>                  if dep not in deps:
> -                    deps[dep], values[dep] = build_dependencies(dep, keys, shelldeps, varflagsexcl, ignored_vars, d)
> +                    deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
>                  newdeps |=  deps[dep]
>              newdeps -= seen
>          #print "For %s: %s" % (task, str(deps[task]))
> diff --git a/lib/bb/parse/ast.py b/lib/bb/parse/ast.py
> index 7bfee9ed67..7dbb1adfa8 100644
> --- a/lib/bb/parse/ast.py
> +++ b/lib/bb/parse/ast.py
> @@ -10,6 +10,7 @@
>  #
>
>  import sys
> +import inspect
>  import bb
>  from bb import methodpool
>  from bb.parse import logger
> @@ -284,7 +285,44 @@ class PyLibNode(AstNode):
>              bb.utils._context[self.namespace] = __import__(self.namespace)
>              toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", [])
>              for i in toimport:
> -                bb.utils._context[i.split(".", 1)[0]] = __import__(i)
> +                base = i.split(".", 1)[0]
> +                bb.utils._context[base] = __import__(i)
> +                if "." in i:
> +                    mod = getattr(bb.utils._context[base], i.split(".", 1)[1])
> +                    funcs = dir(mod)
> +                    for f in funcs:
> +                        if f.startswith("_"):
> +                            continue
> +                        fcall = getattr(mod, f)
> +                        if not callable(fcall):
> +                            continue
> +                        src = inspect.getsource(fcall)

While the text of a module should always be present, making python go
back to it is probably inefficient when it's already gone through the
effort of converting it to byte code (I beleive it has to go find the
corresponding .py file for the .pyc file it already loaded into
memory). Perhaps we can split out the part of PythonParser that deals
with the compiled AST to a new function and call that with inspect?

Maybe something like:

  bytecode = inspect.getmembers_static(fcall, "__code__")[0][1]
  parser.parse_python_ast(bytecode)

> +                        name = "%s.%s" % (i, f)
> +                        parser = bb.codeparser.PythonParser(name, logger)
> +                        parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1)
> +                        execs = parser.execs
> +                        # Expand internal module exec references
> +                        for e in parser.execs:
> +                            if e in funcs:
> +                                execs.remove(e)

You are modifying parser.execs in place which will ruin your
iteration. I think you probably want:

 execs = {f"{i}.{e}" if e in funcs else e for e in parser.execs}

Or if you don't want list comprehension:

 execs = []
 for e in parse.execs:
   if e in funcs:
     execs.add(f"{i}.{e}")
  else:
     execs.add(e)

> +                                execs.add(i + "." + e)
> +                        bb.codeparser.modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
> +                        #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, src, parser.references, parser.execs, parser.var_execs, parser.contains))
> +
> +            # Resolve intermodule references now, stack dependencies
> +            found = True
> +            while found:
> +                found = False
> +                funcs = set(bb.codeparser.modulecode_deps.keys())
> +                for entry in bb.codeparser.modulecode_deps:
> +                    for dup in (bb.codeparser.modulecode_deps[entry][1] & funcs):
> +                        found = True
> +                        bb.codeparser.modulecode_deps[entry][1].remove(dup)
> +                        bb.codeparser.modulecode_deps[entry][0] |= bb.codeparser.modulecode_deps[dup][0]
> +                        bb.codeparser.modulecode_deps[entry][1] |= bb.codeparser.modulecode_deps[dup][1]
> +                        bb.codeparser.modulecode_deps[entry][2] |= bb.codeparser.modulecode_deps[dup][2]
> +                        bb.codeparser.modulecode_deps[entry][3] |= bb.codeparser.modulecode_deps[dup][3]
> +
>          except AttributeError as e:
>              bb.error("Error importing OE modules: %s" % str(e))
>
> --
> 2.34.1
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#14116): https://lists.openembedded.org/g/bitbake-devel/message/14116
> Mute This Topic: https://lists.openembedded.org/mt/95297764/3616693
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [JPEWhacker@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Richard Purdie Nov. 28, 2022, 2:50 p.m. UTC | #2
On Mon, 2022-11-28 at 08:32 -0600, Joshua Watt wrote:
> On Sun, Nov 27, 2022 at 3:36 PM Richard Purdie
> <richard.purdie@linuxfoundation.org> wrote:
> > --- a/lib/bb/parse/ast.py
> > +++ b/lib/bb/parse/ast.py
> > @@ -10,6 +10,7 @@
> >  #
> > 
> >  import sys
> > +import inspect
> >  import bb
> >  from bb import methodpool
> >  from bb.parse import logger
> > @@ -284,7 +285,44 @@ class PyLibNode(AstNode):
> >              bb.utils._context[self.namespace] = __import__(self.namespace)
> >              toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", [])
> >              for i in toimport:
> > -                bb.utils._context[i.split(".", 1)[0]] = __import__(i)
> > +                base = i.split(".", 1)[0]
> > +                bb.utils._context[base] = __import__(i)
> > +                if "." in i:
> > +                    mod = getattr(bb.utils._context[base], i.split(".", 1)[1])
> > +                    funcs = dir(mod)
> > +                    for f in funcs:
> > +                        if f.startswith("_"):
> > +                            continue
> > +                        fcall = getattr(mod, f)
> > +                        if not callable(fcall):
> > +                            continue
> > +                        src = inspect.getsource(fcall)
> 
> While the text of a module should always be present, making python go
> back to it is probably inefficient when it's already gone through the
> effort of converting it to byte code (I beleive it has to go find the
> corresponding .py file for the .pyc file it already loaded into
> memory). Perhaps we can split out the part of PythonParser that deals
> with the compiled AST to a new function and call that with inspect?
> 
> Maybe something like:
> 
>   bytecode = inspect.getmembers_static(fcall, "__code__")[0][1]
>   parser.parse_python_ast(bytecode)

Sadly we need the text since PythonParser has it's own optimisations
and hash lookups based upon that :/. I agree this is probably not as
efficient as it should be though. We also still don't currently get the
lineno information we need either.

> 
> > +                        name = "%s.%s" % (i, f)
> > +                        parser = bb.codeparser.PythonParser(name, logger)
> > +                        parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1)
> > +                        execs = parser.execs
> > +                        # Expand internal module exec references
> > +                        for e in parser.execs:
> > +                            if e in funcs:
> > +                                execs.remove(e)
> 
> You are modifying parser.execs in place which will ruin your
> iteration.

Well spotted. It was meant to be execs = parser.execs.copy() (see the
other copy()'s of the other bits later.

>  I think you probably want:
> 
>  execs = {f"{i}.{e}" if e in funcs else e for e in parser.execs}
> 
> Or if you don't want list comprehension:
> 
>  execs = []
>  for e in parse.execs:
>    if e in funcs:
>      execs.add(f"{i}.{e}")
>   else:
>      execs.add(e)

Cheers,

Richard
diff mbox series

Patch

diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
index 9d66d3ae41..e7d7ebe048 100644
--- a/lib/bb/codeparser.py
+++ b/lib/bb/codeparser.py
@@ -36,6 +36,8 @@  from bb.cache import MultiProcessCache
 
 logger = logging.getLogger('BitBake.CodeParser')
 
+modulecode_deps = {}
+
 def bbhash(s):
     return hashlib.sha256(s.encode("utf-8")).hexdigest()
 
diff --git a/lib/bb/data.py b/lib/bb/data.py
index 22d314d6c2..ab1a5959ba 100644
--- a/lib/bb/data.py
+++ b/lib/bb/data.py
@@ -261,22 +261,9 @@  def emit_func_python(func, o=sys.__stdout__, d = init()):
                newdeps |= set((d.getVarFlag(dep, "vardeps") or "").split())
         newdeps -= seen
 
-def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
+def build_dependencies(key, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d):
     deps = set()
     try:
-        if key[-1] == ']':
-            vf = key[:-1].split('[')
-            if vf[1] == "vardepvalueexclude":
-                return deps, ""
-            value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True)
-            deps |= parser.references
-            deps = deps | (keys & parser.execs)
-            deps -= ignored_vars
-            return frozenset(deps), value
-        varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {}
-        vardeps = varflags.get("vardeps")
-        exclusions = varflags.get("vardepsexclude", "").split()
-
         def handle_contains(value, contains, exclusions, d):
             newvalue = []
             if value:
@@ -294,6 +281,24 @@  def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
                         newvalue.append("\n%s{%s} = Set" % (k, item))
             return "".join(newvalue)
 
+        if key in mod_funcs:
+            exclusions = set()
+            moddep = bb.codeparser.modulecode_deps[key]
+            value = handle_contains("", moddep[3], exclusions, d)
+            return frozenset(moddep[0] - ignored_vars), value
+        if key[-1] == ']':
+            vf = key[:-1].split('[')
+            if vf[1] == "vardepvalueexclude":
+                return deps, ""
+            value, parser = d.getVarFlag(vf[0], vf[1], False, retparser=True)
+            deps |= parser.references
+            deps = deps | (keys & parser.execs)
+            deps -= ignored_vars
+            return frozenset(deps), value
+        varflags = d.getVarFlags(key, ["vardeps", "vardepvalue", "vardepsexclude", "exports", "postfuncs", "prefuncs", "lineno", "filename"]) or {}
+        vardeps = varflags.get("vardeps")
+        exclusions = varflags.get("vardepsexclude", "").split()
+
         def handle_remove(value, deps, removes, d):
             for r in sorted(removes):
                 r2 = d.expandWithRefs(r, None)
@@ -367,7 +372,8 @@  def build_dependencies(key, keys, shelldeps, varflagsexcl, ignored_vars, d):
 
 def generate_dependencies(d, ignored_vars):
 
-    keys = set(key for key in d if not key.startswith("__"))
+    mod_funcs = set(bb.codeparser.modulecode_deps.keys())
+    keys = set(key for key in d if not key.startswith("__")) | mod_funcs
     shelldeps = set(key for key in d.getVar("__exportlist", False) if d.getVarFlag(key, "export", False) and not d.getVarFlag(key, "unexport", False))
     varflagsexcl = d.getVar('BB_SIGNATURE_EXCLUDE_FLAGS')
 
@@ -376,7 +382,7 @@  def generate_dependencies(d, ignored_vars):
 
     tasklist = d.getVar('__BBTASKS', False) or []
     for task in tasklist:
-        deps[task], values[task] = build_dependencies(task, keys, shelldeps, varflagsexcl, ignored_vars, d)
+        deps[task], values[task] = build_dependencies(task, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
         newdeps = deps[task]
         seen = set()
         while newdeps:
@@ -385,7 +391,7 @@  def generate_dependencies(d, ignored_vars):
             newdeps = set()
             for dep in nextdeps:
                 if dep not in deps:
-                    deps[dep], values[dep] = build_dependencies(dep, keys, shelldeps, varflagsexcl, ignored_vars, d)
+                    deps[dep], values[dep] = build_dependencies(dep, keys, mod_funcs, shelldeps, varflagsexcl, ignored_vars, d)
                 newdeps |=  deps[dep]
             newdeps -= seen
         #print "For %s: %s" % (task, str(deps[task]))
diff --git a/lib/bb/parse/ast.py b/lib/bb/parse/ast.py
index 7bfee9ed67..7dbb1adfa8 100644
--- a/lib/bb/parse/ast.py
+++ b/lib/bb/parse/ast.py
@@ -10,6 +10,7 @@ 
 #
 
 import sys
+import inspect
 import bb
 from bb import methodpool
 from bb.parse import logger
@@ -284,7 +285,44 @@  class PyLibNode(AstNode):
             bb.utils._context[self.namespace] = __import__(self.namespace)
             toimport = getattr(bb.utils._context[self.namespace], "BBIMPORTS", [])
             for i in toimport:
-                bb.utils._context[i.split(".", 1)[0]] = __import__(i)
+                base = i.split(".", 1)[0]
+                bb.utils._context[base] = __import__(i)
+                if "." in i:
+                    mod = getattr(bb.utils._context[base], i.split(".", 1)[1])
+                    funcs = dir(mod)
+                    for f in funcs:
+                        if f.startswith("_"):
+                            continue
+                        fcall = getattr(mod, f)
+                        if not callable(fcall):
+                            continue
+                        src = inspect.getsource(fcall)
+                        name = "%s.%s" % (i, f)
+                        parser = bb.codeparser.PythonParser(name, logger)
+                        parser.parse_python(src, filename=inspect.getfile(fcall), lineno=1)
+                        execs = parser.execs
+                        # Expand internal module exec references
+                        for e in parser.execs:
+                            if e in funcs:
+                                execs.remove(e)
+                                execs.add(i + "." + e)
+                        bb.codeparser.modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
+                        #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, src, parser.references, parser.execs, parser.var_execs, parser.contains))
+
+            # Resolve intermodule references now, stack dependencies
+            found = True
+            while found:
+                found = False
+                funcs = set(bb.codeparser.modulecode_deps.keys())
+                for entry in bb.codeparser.modulecode_deps:
+                    for dup in (bb.codeparser.modulecode_deps[entry][1] & funcs):
+                        found = True
+                        bb.codeparser.modulecode_deps[entry][1].remove(dup)
+                        bb.codeparser.modulecode_deps[entry][0] |= bb.codeparser.modulecode_deps[dup][0]
+                        bb.codeparser.modulecode_deps[entry][1] |= bb.codeparser.modulecode_deps[dup][1]
+                        bb.codeparser.modulecode_deps[entry][2] |= bb.codeparser.modulecode_deps[dup][2]
+                        bb.codeparser.modulecode_deps[entry][3] |= bb.codeparser.modulecode_deps[dup][3]
+
         except AttributeError as e:
             bb.error("Error importing OE modules: %s" % str(e))