diff mbox series

coreparser: Allow code visitor expressions to be declared in metadata

Message ID 20240828120646.1458872-1-richard.purdie@linuxfoundation.org
State New
Headers show
Series coreparser: Allow code visitor expressions to be declared in metadata | expand

Commit Message

Richard Purdie Aug. 28, 2024, 12:06 p.m. UTC
Allow the metadata to define code visitor expressions which mean that
custom dependencies can be handled in function libraries.

An example is the qa.handle_error function in OE which can set something
like:

handle_error.visitorcode = """
if isinstance(args[0], ast.Constant) and isinstance(args[0].value, str):
    for i in ["ERROR_QA", "WARN_QA"]:
        if i not in self.contains:
            self.contains[i] = set()
    self.contains[i].add(args[0].value)
else:
    self.warn(node.func, args[0])
    self.execs.add(name)
"""

Meaning that it can have contains optimisations on ERROR and WARN_QA
instead of hard dependencies.

One drawback to this solution is the parsing order. Functions with
visitorcode need to be defined before anything else references them
or the visitor code will not function for the earlier references.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/cache.py      |  2 +-
 lib/bb/codeparser.py | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

Comments

Joshua Watt Aug. 28, 2024, 4:35 p.m. UTC | #1
On Wed, Aug 28, 2024 at 6:06 AM Richard Purdie via
lists.openembedded.org
<richard.purdie=linuxfoundation.org@lists.openembedded.org> wrote:
>
> Allow the metadata to define code visitor expressions which mean that
> custom dependencies can be handled in function libraries.
>
> An example is the qa.handle_error function in OE which can set something
> like:
>
> handle_error.visitorcode = """
> if isinstance(args[0], ast.Constant) and isinstance(args[0].value, str):
>     for i in ["ERROR_QA", "WARN_QA"]:
>         if i not in self.contains:
>             self.contains[i] = set()
>     self.contains[i].add(args[0].value)
> else:
>     self.warn(node.func, args[0])
>     self.execs.add(name)
> """

Open ended code feels like it could pretty easily get us in trouble
(plus, you have to compile it to boot, so you don't know if it's bad
until then and have to deal with that error). Can a function be used
instead? Something like:

def handle_error_visitor(... some argos...)
   ... return some thing useful

handle_error.visitorcode = handle_error_visitor

The other advantage I can see of doing this is that you can picky
about the passed arguments and the return code to put some limitations
(i.e define a clear API) on what you allow a visitor to actual do, and
you don't have to worry about visitor code squatting/corrupting
variable names, etc.



>
> Meaning that it can have contains optimisations on ERROR and WARN_QA
> instead of hard dependencies.
>
> One drawback to this solution is the parsing order. Functions with
> visitorcode need to be defined before anything else references them
> or the visitor code will not function for the earlier references.
>
> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
> ---
>  lib/bb/cache.py      |  2 +-
>  lib/bb/codeparser.py | 14 ++++++++++----
>  2 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/lib/bb/cache.py b/lib/bb/cache.py
> index c48feb7138..958652e0e3 100644
> --- a/lib/bb/cache.py
> +++ b/lib/bb/cache.py
> @@ -28,7 +28,7 @@ import shutil
>
>  logger = logging.getLogger("BitBake.Cache")
>
> -__cache_version__ = "155"
> +__cache_version__ = "156"
>
>  def getCacheFile(path, filename, mc, data_hash):
>      mcspec = ''
> diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
> index b25a2133d2..01134edd4e 100644
> --- a/lib/bb/codeparser.py
> +++ b/lib/bb/codeparser.py
> @@ -87,14 +87,17 @@ def add_module_functions(fn, functions, namespace):
>              if e in functions:
>                  execs.remove(e)
>                  execs.add(namespace + "." + e)
> -        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy(), parser.extra]
> +        visitorcode = None
> +        if hasattr(functions[f], 'visitorcode'):
> +            visitorcode = compile(getattr(functions[f], "visitorcode"), f + ".visitorcode", 'exec')
> +        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy(), parser.extra, visitorcode]
>          #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, fn, parser.references, parser.execs, parser.var_execs, parser.contains))
>
>  def update_module_dependencies(d):
>      for mod in modulecode_deps:
>          excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
>          if excludes:
> -            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3], modulecode_deps[mod][4]]
> +            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3], modulecode_deps[mod][4], modulecode_deps[mod][5]]
>
>  # A custom getstate/setstate using tuples is actually worth 15% cachesize by
>  # avoiding duplication of the attribute names!
> @@ -161,7 +164,7 @@ class CodeParserCache(MultiProcessCache):
>      # so that an existing cache gets invalidated. Additionally you'll need
>      # to increment __cache_version__ in cache.py in order to ensure that old
>      # recipe caches don't trigger "Taskhash mismatch" errors.
> -    CACHE_VERSION = 12
> +    CACHE_VERSION = 14
>
>      def __init__(self):
>          MultiProcessCache.__init__(self)
> @@ -261,7 +264,10 @@ class PythonParser():
>
>      def visit_Call(self, node):
>          name = self.called_node_name(node.func)
> -        if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
> +        if name and name in modulecode_deps and modulecode_deps[name][5]:
> +            args = node.args
> +            exec(modulecode_deps[name][5])
> +        elif name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
>              if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
>                  varname = node.args[0].value
>                  if name in self.containsfuncs and isinstance(node.args[1], ast.Constant):
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#16523): https://lists.openembedded.org/g/bitbake-devel/message/16523
> Mute This Topic: https://lists.openembedded.org/mt/108140370/3616693
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [JPEWhacker@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Richard Purdie Aug. 28, 2024, 4:56 p.m. UTC | #2
On Wed, 2024-08-28 at 10:35 -0600, Joshua Watt wrote:
> On Wed, Aug 28, 2024 at 6:06 AM Richard Purdie via
> lists.openembedded.org
> <richard.purdie=linuxfoundation.org@lists.openembedded.org> wrote:
> > 
> > Allow the metadata to define code visitor expressions which mean
> > that
> > custom dependencies can be handled in function libraries.
> > 
> > An example is the qa.handle_error function in OE which can set
> > something
> > like:
> > 
> > handle_error.visitorcode = """
> > if isinstance(args[0], ast.Constant) and isinstance(args[0].value,
> > str):
> >     for i in ["ERROR_QA", "WARN_QA"]:
> >         if i not in self.contains:
> >             self.contains[i] = set()
> >     self.contains[i].add(args[0].value)
> > else:
> >     self.warn(node.func, args[0])
> >     self.execs.add(name)
> > """
> 
> Open ended code feels like it could pretty easily get us in trouble
> (plus, you have to compile it to boot, so you don't know if it's bad
> until then and have to deal with that error). Can a function be used
> instead? Something like:
> 
> def handle_error_visitor(... some argos...)
>    ... return some thing useful
> 
> handle_error.visitorcode = handle_error_visitor
> 
> The other advantage I can see of doing this is that you can picky
> about the passed arguments and the return code to put some
> limitations (i.e define a clear API) on what you allow a visitor to
> actual do, and you don't have to worry about visitor code
> squatting/corrupting variable names, etc.

I did try that but I was struggling to get things to work, partially as
trying to get into the right scope to resolve the function names was
annoyingly tricky with the current code structure. Perhaps now I have
the above working (which wasn't as simple as it might appear), I should
perhaps revisit using a function as I was probably in the wrong place
to get it working previously...

Cheers,

Richard
diff mbox series

Patch

diff --git a/lib/bb/cache.py b/lib/bb/cache.py
index c48feb7138..958652e0e3 100644
--- a/lib/bb/cache.py
+++ b/lib/bb/cache.py
@@ -28,7 +28,7 @@  import shutil
 
 logger = logging.getLogger("BitBake.Cache")
 
-__cache_version__ = "155"
+__cache_version__ = "156"
 
 def getCacheFile(path, filename, mc, data_hash):
     mcspec = ''
diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
index b25a2133d2..01134edd4e 100644
--- a/lib/bb/codeparser.py
+++ b/lib/bb/codeparser.py
@@ -87,14 +87,17 @@  def add_module_functions(fn, functions, namespace):
             if e in functions:
                 execs.remove(e)
                 execs.add(namespace + "." + e)
-        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy(), parser.extra]
+        visitorcode = None
+        if hasattr(functions[f], 'visitorcode'):
+            visitorcode = compile(getattr(functions[f], "visitorcode"), f + ".visitorcode", 'exec')
+        modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy(), parser.extra, visitorcode]
         #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, fn, parser.references, parser.execs, parser.var_execs, parser.contains))
 
 def update_module_dependencies(d):
     for mod in modulecode_deps:
         excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
         if excludes:
-            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3], modulecode_deps[mod][4]]
+            modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3], modulecode_deps[mod][4], modulecode_deps[mod][5]]
 
 # A custom getstate/setstate using tuples is actually worth 15% cachesize by
 # avoiding duplication of the attribute names!
@@ -161,7 +164,7 @@  class CodeParserCache(MultiProcessCache):
     # so that an existing cache gets invalidated. Additionally you'll need
     # to increment __cache_version__ in cache.py in order to ensure that old
     # recipe caches don't trigger "Taskhash mismatch" errors.
-    CACHE_VERSION = 12
+    CACHE_VERSION = 14
 
     def __init__(self):
         MultiProcessCache.__init__(self)
@@ -261,7 +264,10 @@  class PythonParser():
 
     def visit_Call(self, node):
         name = self.called_node_name(node.func)
-        if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
+        if name and name in modulecode_deps and modulecode_deps[name][5]:
+            args = node.args
+            exec(modulecode_deps[name][5])
+        elif name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
             if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
                 varname = node.args[0].value
                 if name in self.containsfuncs and isinstance(node.args[1], ast.Constant):