diff mbox series

codeparser: Fix handling of string AST nodes with older Python versions

Message ID 20241030091721.1598782-1-philip.lorenz@bmw.de
State New
Headers show
Series codeparser: Fix handling of string AST nodes with older Python versions | expand

Commit Message

Philip Lorenz Oct. 30, 2024, 9:17 a.m. UTC
Commits 4591011449212c8e494ea42348acb2d27a82a51b and
6c19b6cf105ac321ec89da1a876a317020c45ab7 unconditionally changed
codeparser to rely on CPython 3.8 semantics. However, kirkstone
continues to support CPython versions >= 3.6.0 and as such string AST
nodes were no longer correctly identified.

Fix this by continuing to use `ast.Str` for Python versions < 3.8.0 and
only using the new code path for more recent versions. Detecting which
version of the AST API to use seems to be non-trivial so the Python
feature version is used instead.

Instances of this issue can be identified when executing bitbake with
debug logging:

    while parsing MACHINE_ARCH, in call of d.getVar, argument
        ''TUNE_PKGARCH'' is not a string literal

As a consequence of these parsing issues, bitbake may assume that task
inputs haven't changed and as such erroneously reuse sstate objects when
it shouldn't.

Signed-off-by: Philip Lorenz <philip.lorenz@bmw.de>
---
 lib/bb/codeparser.py | 46 +++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

Comments

Alexander Kanavin Oct. 30, 2024, 9:38 a.m. UTC | #1
I guess this is only aimed for kirkstone? If so, you need to put
[kirkstone] in the subject.

Alex

On Wed, 30 Oct 2024 at 10:33, Philip Lorenz via lists.openembedded.org
<philip.lorenz=bmw.de@lists.openembedded.org> wrote:
>
> Commits 4591011449212c8e494ea42348acb2d27a82a51b and
> 6c19b6cf105ac321ec89da1a876a317020c45ab7 unconditionally changed
> codeparser to rely on CPython 3.8 semantics. However, kirkstone
> continues to support CPython versions >= 3.6.0 and as such string AST
> nodes were no longer correctly identified.
>
> Fix this by continuing to use `ast.Str` for Python versions < 3.8.0 and
> only using the new code path for more recent versions. Detecting which
> version of the AST API to use seems to be non-trivial so the Python
> feature version is used instead.
>
> Instances of this issue can be identified when executing bitbake with
> debug logging:
>
>     while parsing MACHINE_ARCH, in call of d.getVar, argument
>         ''TUNE_PKGARCH'' is not a string literal
>
> As a consequence of these parsing issues, bitbake may assume that task
> inputs haven't changed and as such erroneously reuse sstate objects when
> it shouldn't.
>
> Signed-off-by: Philip Lorenz <philip.lorenz@bmw.de>
> ---
>  lib/bb/codeparser.py | 46 +++++++++++++++++++++++++++++++-------------
>  1 file changed, 33 insertions(+), 13 deletions(-)
>
> diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
> index 6ce0c5182..39dba266c 100644
> --- a/lib/bb/codeparser.py
> +++ b/lib/bb/codeparser.py
> @@ -201,6 +201,22 @@ class DummyLogger():
>      def flush(self):
>          return
>
> +
> +# Starting with Python 3.8, the ast module exposes all string nodes as a
> +# Constant. While earlier versions of the module also have the Constant type
> +# those use the Str type to encapsulate strings.
> +if sys.version_info < (3, 8):
> +    def node_str_value(node):
> +        if isinstance(node, ast.Str):
> +            return node.s
> +        return None
> +else:
> +    def node_str_value(node):
> +        if isinstance(node, ast.Constant) and isinstance(node.value, str):
> +            return node.value
> +        return None
> +
> +
>  class PythonParser():
>      getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
>      getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
> @@ -225,19 +241,22 @@ class PythonParser():
>      def visit_Call(self, node):
>          name = self.called_node_name(node.func)
>          if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
> -            if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
> -                varname = node.args[0].value
> -                if name in self.containsfuncs and isinstance(node.args[1], ast.Constant):
> +            varname = node_str_value(node.args[0])
> +            if varname is not None:
> +                arg_str_value = None
> +                if len(node.args) >= 2:
> +                    arg_str_value = node_str_value(node.args[1])
> +                if name in self.containsfuncs and arg_str_value is not None:
>                      if varname not in self.contains:
>                          self.contains[varname] = set()
> -                    self.contains[varname].add(node.args[1].value)
> -                elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Constant):
> +                    self.contains[varname].add(arg_str_value)
> +                elif name in self.containsanyfuncs and arg_str_value is not None:
>                      if varname not in self.contains:
>                          self.contains[varname] = set()
> -                    self.contains[varname].update(node.args[1].value.split())
> +                    self.contains[varname].update(arg_str_value.split())
>                  elif name.endswith(self.getvarflags):
> -                    if isinstance(node.args[1], ast.Constant):
> -                        self.references.add('%s[%s]' % (varname, node.args[1].value))
> +                    if arg_str_value is not None:
> +                        self.references.add('%s[%s]' % (varname, arg_str_value))
>                      else:
>                          self.warn(node.func, node.args[1])
>                  else:
> @@ -245,10 +264,10 @@ class PythonParser():
>              else:
>                  self.warn(node.func, node.args[0])
>          elif name and name.endswith(".expand"):
> -            if isinstance(node.args[0], ast.Constant):
> -                value = node.args[0].value
> +            arg_str_value = node_str_value(node.args[0])
> +            if arg_str_value is not None:
>                  d = bb.data.init()
> -                parser = d.expandWithRefs(value, self.name)
> +                parser = d.expandWithRefs(arg_str_value, self.name)
>                  self.references |= parser.references
>                  self.execs |= parser.execs
>                  for varname in parser.contains:
> @@ -256,8 +275,9 @@ class PythonParser():
>                          self.contains[varname] = set()
>                      self.contains[varname] |= parser.contains[varname]
>          elif name in self.execfuncs:
> -            if isinstance(node.args[0], ast.Constant):
> -                self.var_execs.add(node.args[0].value)
> +            arg_str_value = node_str_value(node.args[0])
> +            if arg_str_value is not None:
> +                self.var_execs.add(arg_str_value)
>              else:
>                  self.warn(node.func, node.args[0])
>          elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
> --
> 2.47.0
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#16740): https://lists.openembedded.org/g/bitbake-devel/message/16740
> Mute This Topic: https://lists.openembedded.org/mt/109292959/1686489
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Philip Lorenz Oct. 30, 2024, 10:11 a.m. UTC | #2
Hi Alex,

On 30.10.24 10:38, Alexander Kanavin wrote:
> I guess this is only aimed for kirkstone? If so, you need to put
> [kirkstone] in the subject.

There seems to be some significant delay in mail delivery somewhere (I 
just received this mail 30 minutes "late"). I have in the meantime 
already resent the patch with a [2.0] prefix. I hope that's fine but can 
also resend with [kirkstone] if this is needed to make sure it shows up 
in the correct filters.

Philip
diff mbox series

Patch

diff --git a/lib/bb/codeparser.py b/lib/bb/codeparser.py
index 6ce0c5182..39dba266c 100644
--- a/lib/bb/codeparser.py
+++ b/lib/bb/codeparser.py
@@ -201,6 +201,22 @@  class DummyLogger():
     def flush(self):
         return
 
+
+# Starting with Python 3.8, the ast module exposes all string nodes as a
+# Constant. While earlier versions of the module also have the Constant type
+# those use the Str type to encapsulate strings.
+if sys.version_info < (3, 8):
+    def node_str_value(node):
+        if isinstance(node, ast.Str):
+            return node.s
+        return None
+else:
+    def node_str_value(node):
+        if isinstance(node, ast.Constant) and isinstance(node.value, str):
+            return node.value
+        return None
+
+
 class PythonParser():
     getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
     getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
@@ -225,19 +241,22 @@  class PythonParser():
     def visit_Call(self, node):
         name = self.called_node_name(node.func)
         if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
-            if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
-                varname = node.args[0].value
-                if name in self.containsfuncs and isinstance(node.args[1], ast.Constant):
+            varname = node_str_value(node.args[0])
+            if varname is not None:
+                arg_str_value = None
+                if len(node.args) >= 2:
+                    arg_str_value = node_str_value(node.args[1])
+                if name in self.containsfuncs and arg_str_value is not None:
                     if varname not in self.contains:
                         self.contains[varname] = set()
-                    self.contains[varname].add(node.args[1].value)
-                elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Constant):
+                    self.contains[varname].add(arg_str_value)
+                elif name in self.containsanyfuncs and arg_str_value is not None:
                     if varname not in self.contains:
                         self.contains[varname] = set()
-                    self.contains[varname].update(node.args[1].value.split())
+                    self.contains[varname].update(arg_str_value.split())
                 elif name.endswith(self.getvarflags):
-                    if isinstance(node.args[1], ast.Constant):
-                        self.references.add('%s[%s]' % (varname, node.args[1].value))
+                    if arg_str_value is not None:
+                        self.references.add('%s[%s]' % (varname, arg_str_value))
                     else:
                         self.warn(node.func, node.args[1])
                 else:
@@ -245,10 +264,10 @@  class PythonParser():
             else:
                 self.warn(node.func, node.args[0])
         elif name and name.endswith(".expand"):
-            if isinstance(node.args[0], ast.Constant):
-                value = node.args[0].value
+            arg_str_value = node_str_value(node.args[0])
+            if arg_str_value is not None:
                 d = bb.data.init()
-                parser = d.expandWithRefs(value, self.name)
+                parser = d.expandWithRefs(arg_str_value, self.name)
                 self.references |= parser.references
                 self.execs |= parser.execs
                 for varname in parser.contains:
@@ -256,8 +275,9 @@  class PythonParser():
                         self.contains[varname] = set()
                     self.contains[varname] |= parser.contains[varname]
         elif name in self.execfuncs:
-            if isinstance(node.args[0], ast.Constant):
-                self.var_execs.add(node.args[0].value)
+            arg_str_value = node_str_value(node.args[0])
+            if arg_str_value is not None:
+                self.var_execs.add(arg_str_value)
             else:
                 self.warn(node.func, node.args[0])
         elif name and isinstance(node.func, (ast.Name, ast.Attribute)):