diff mbox series

[2/2] process/cooker/command: Fix currentAsyncCommand locking/races

Message ID 20230110160302.1470163-2-richard.purdie@linuxfoundation.org
State Accepted, archived
Commit b5215887d2f8ea3f28f1ebda721bd5b8f93ec7f3
Headers show
Series [1/2] cooker/command: Drop async command handler indirection via cooker | expand

Commit Message

Richard Purdie Jan. 10, 2023, 4:03 p.m. UTC
currentAsyncCommand currently doesn't have any locking and we have
a conflict in "idle" conditions since the idle functions count needs
to be zero *and* there needs to be no active command.

Move the changes/checks of currentAsyncCommand to within the lock
and then we can add it to the condition for idle, simplifying some
of the code.

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
 lib/bb/command.py        | 28 ++++++++++++++--------------
 lib/bb/cooker.py         | 12 ++++++++----
 lib/bb/server/process.py | 19 +++++++++++++------
 3 files changed, 35 insertions(+), 24 deletions(-)

Comments

Joshua Watt Jan. 10, 2023, 4:11 p.m. UTC | #1
On Tue, Jan 10, 2023 at 10:03 AM Richard Purdie
<richard.purdie@linuxfoundation.org> wrote:
>
> currentAsyncCommand currently doesn't have any locking and we have
> a conflict in "idle" conditions since the idle functions count needs
> to be zero *and* there needs to be no active command.
>
> Move the changes/checks of currentAsyncCommand to within the lock
> and then we can add it to the condition for idle, simplifying some
> of the code.
>
> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
> ---
>  lib/bb/command.py        | 28 ++++++++++++++--------------
>  lib/bb/cooker.py         | 12 ++++++++----
>  lib/bb/server/process.py | 19 +++++++++++++------
>  3 files changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/lib/bb/command.py b/lib/bb/command.py
> index c325abbcda..c3dc872d0d 100644
> --- a/lib/bb/command.py
> +++ b/lib/bb/command.py
> @@ -51,13 +51,14 @@ class Command:
>      """
>      A queue of asynchronous commands for bitbake
>      """
> -    def __init__(self, cooker):
> +    def __init__(self, cooker, process_server):
>          self.cooker = cooker
>          self.cmds_sync = CommandsSync()
>          self.cmds_async = CommandsAsync()
>          self.remotedatastores = None
>
> -        # FIXME Add lock for this
> +        self.process_server = process_server
> +        # Access with locking using process_server.get_async_cmd()/set_async_cmd()
>          self.currentAsyncCommand = None
>          self.lastEvent = None
>
> @@ -100,18 +101,16 @@ class Command:
>                  return None, traceback.format_exc()
>              else:
>                  return result, None
> -        if self.currentAsyncCommand is not None:
> -            # Wait for the idle loop to have cleared (30s max)
> -            process_server.wait_for_idle(timeout=30)
> -            if self.currentAsyncCommand is not None:
> -                return None, "Busy (%s in progress)" % self.currentAsyncCommand[0]
> +        # Wait for the idle loop to have cleared (30s max)
> +        if not self.process_server.wait_for_idle(timeout=30):
> +            return None, "Busy (%s in progress)" % self.process_server.get_async_cmd()[0]
>          if command not in CommandsAsync.__dict__:
>              return None, "No such command"
> -        self.currentAsyncCommand = (command, commandline)
> -        self.cooker.idleCallBackRegister(self.runAsyncCommand, None)
> +        process_server.set_async_cmd((command, commandline))

You have a TOCTOU bug here; your wait_for_idle() should be atomic with
the set_async_cmd().

In my PoC, I split apart set_async_cmd() from clear_async_cmd():

 def set_async_cmd(self, cmd):
   with self.idle_cond:
     self.idle_cond.wait_for(self._idle_check)
     self.cooker.command.currentAsynCommand = cmd

 def clear_async_cmd(self):
   with bb.utils.lock_timeout(self._idlefuncsLock):
     self.cooker.command.currentAsyncCommand = None
     self.idle_cond.notify_all()

> +        self.cooker.idleCallBackRegister(self.runAsyncCommand, process_server)
>          return True, None
>
> -    def runAsyncCommand(self, _, _2, halt):
> +    def runAsyncCommand(self, _, process_server, halt):
>          try:
>              self.cooker.process_inotify_updates_apply()
>              if self.cooker.state in (bb.cooker.state.error, bb.cooker.state.shutdown, bb.cooker.state.forceshutdown):
> @@ -119,8 +118,9 @@ class Command:
>                  # and then raise BBHandledException triggering an exit
>                  self.cooker.updateCache()
>                  return bb.server.process.idleFinish("Cooker in error state")
> -            if self.currentAsyncCommand is not None:
> -                (command, options) = self.currentAsyncCommand
> +            cmd = process_server.get_async_cmd()
> +            if cmd is not None:
> +                (command, options) = cmd
>                  commandmethod = getattr(CommandsAsync, command)
>                  needcache = getattr( commandmethod, "needcache" )
>                  if needcache and self.cooker.state != bb.cooker.state.running:
> @@ -156,7 +156,7 @@ class Command:
>          bb.event.fire(event, self.cooker.data)
>          self.lastEvent = event
>          self.cooker.finishcommand()
> -        self.currentAsyncCommand = None
> +        self.process_server.set_async_cmd(None)
>
>      def reset(self):
>          if self.remotedatastores:
> @@ -749,7 +749,7 @@ class CommandsAsync:
>          """
>          event = params[0]
>          bb.event.fire(eval(event), command.cooker.data)
> -        command.currentAsyncCommand = None
> +        process_server.set_async_cmd(None)
>      triggerEvent.needcache = False
>
>      def resetCooker(self, command, params):
> diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
> index 13d6e9d847..5a0e675b44 100644
> --- a/lib/bb/cooker.py
> +++ b/lib/bb/cooker.py
> @@ -149,7 +149,7 @@ class BBCooker:
>      Manages one bitbake build run
>      """
>
> -    def __init__(self, featureSet=None, idleCallBackRegister=None, waitIdle=None):
> +    def __init__(self, featureSet=None, server=None):
>          self.recipecaches = None
>          self.eventlog = None
>          self.skiplist = {}
> @@ -163,8 +163,12 @@ class BBCooker:
>
>          self.configuration = bb.cookerdata.CookerConfiguration()
>
> -        self.idleCallBackRegister = idleCallBackRegister
> -        self.waitIdle = waitIdle
> +        self.process_server = server
> +        self.idleCallBackRegister = None
> +        self.waitIdle = None
> +        if server:
> +            self.idleCallBackRegister = server.register_idle_function
> +            self.waitIdle = server.wait_for_idle
>
>          bb.debug(1, "BBCooker starting %s" % time.time())
>          sys.stdout.flush()
> @@ -203,7 +207,7 @@ class BBCooker:
>          except UnsupportedOperation:
>              pass
>
> -        self.command = bb.command.Command(self)
> +        self.command = bb.command.Command(self, self.process_server)
>          self.state = state.initial
>
>          self.parser = None
> diff --git a/lib/bb/server/process.py b/lib/bb/server/process.py
> index 4422fd7311..36c109ffd3 100644
> --- a/lib/bb/server/process.py
> +++ b/lib/bb/server/process.py
> @@ -158,7 +158,7 @@ class ProcessServer():
>      def wait_for_idle(self, timeout=30):
>          # Wait for the idle loop to have cleared
>          with self.idle_cond:
> -            self.idle_cond.wait_for(lambda: len(self._idlefuns) == 0, timeout)
> +            return self.idle_cond.wait_for(lambda: len(self._idlefuns) == 0 and self.cooker.command.currentAsyncCommand is None, timeout) is not False
>
>      def main(self):
>          self.cooker.pre_serve()
> @@ -184,11 +184,9 @@ class ProcessServer():
>                  self.controllersock = False
>              if self.haveui:
>                  # Wait for the idle loop to have cleared (30s max)
> -                self.wait_for_idle(30)
> -                if self.cooker.command.currentAsyncCommand is not None:
> +                if not self.wait_for_idle(30):
>                      serverlog("Idle loop didn't finish queued commands after 30s, exiting.")
>                      self.quit = True
> -
>                  fds.remove(self.command_channel)
>                  bb.event.unregister_UIHhandler(self.event_handle, True)
>                  self.command_channel_reply.writer.close()
> @@ -377,6 +375,15 @@ class ProcessServer():
>                      msg.append(":\n%s" % procs)
>                  serverlog("".join(msg))
>
> +    def get_async_cmd(self):
> +        with bb.utils.lock_timeout(self._idlefuncsLock):
> +            return self.cooker.command.currentAsyncCommand
> +
> +    def set_async_cmd(self, cmd):
> +        with bb.utils.lock_timeout(self._idlefuncsLock):
> +            self.cooker.command.currentAsyncCommand = cmd
> +            self.idle_cond.notify_all()
> +
>      def idle_thread(self):
>          def remove_idle_func(function):
>              with bb.utils.lock_timeout(self._idlefuncsLock):
> @@ -427,7 +434,7 @@ class ProcessServer():
>                  lastdebug = time.time()
>                  with bb.utils.lock_timeout(self._idlefuncsLock):
>                      num_funcs = len(self._idlefuns)
> -                serverlog("Current command %s, idle functions %s, last exit event %s, state %s" % (self.cooker.command.currentAsyncCommand, num_funcs, self.cooker.command.lastEvent, self.cooker.state))
> +                serverlog("Current command %s, idle functions %s, last exit event %s, state %s" % (self.get_async_cmd(), num_funcs, self.cooker.command.lastEvent, self.cooker.state))
>
>              if nextsleep is not None:
>                  select.select(fds,[],[],nextsleep)[0]
> @@ -632,7 +639,7 @@ def execServer(lockfd, readypipeinfd, lockname, sockname, server_timeout, xmlrpc
>          writer = ConnectionWriter(readypipeinfd)
>          try:
>              featureset = []
> -            cooker = bb.cooker.BBCooker(featureset, server.register_idle_function, server.wait_for_idle)
> +            cooker = bb.cooker.BBCooker(featureset, server)
>              cooker.configuration.profile = profile
>          except bb.BBHandledException:
>              return None
> --
> 2.37.2
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#14296): https://lists.openembedded.org/g/bitbake-devel/message/14296
> Mute This Topic: https://lists.openembedded.org/mt/96179934/3616693
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [JPEWhacker@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Richard Purdie Jan. 10, 2023, 4:35 p.m. UTC | #2
On Tue, 2023-01-10 at 10:11 -0600, Joshua Watt wrote:
> On Tue, Jan 10, 2023 at 10:03 AM Richard Purdie
> <richard.purdie@linuxfoundation.org> wrote:
> > 
> > currentAsyncCommand currently doesn't have any locking and we have
> > a conflict in "idle" conditions since the idle functions count needs
> > to be zero *and* there needs to be no active command.
> > 
> > Move the changes/checks of currentAsyncCommand to within the lock
> > and then we can add it to the condition for idle, simplifying some
> > of the code.
> > 
> > Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
> > ---
> >  lib/bb/command.py        | 28 ++++++++++++++--------------
> >  lib/bb/cooker.py         | 12 ++++++++----
> >  lib/bb/server/process.py | 19 +++++++++++++------
> >  3 files changed, 35 insertions(+), 24 deletions(-)
> > 
> > diff --git a/lib/bb/command.py b/lib/bb/command.py
> > index c325abbcda..c3dc872d0d 100644
> > --- a/lib/bb/command.py
> > +++ b/lib/bb/command.py
> > @@ -51,13 +51,14 @@ class Command:
> >      """
> >      A queue of asynchronous commands for bitbake
> >      """
> > -    def __init__(self, cooker):
> > +    def __init__(self, cooker, process_server):
> >          self.cooker = cooker
> >          self.cmds_sync = CommandsSync()
> >          self.cmds_async = CommandsAsync()
> >          self.remotedatastores = None
> > 
> > -        # FIXME Add lock for this
> > +        self.process_server = process_server
> > +        # Access with locking using process_server.get_async_cmd()/set_async_cmd()
> >          self.currentAsyncCommand = None
> >          self.lastEvent = None
> > 
> > @@ -100,18 +101,16 @@ class Command:
> >                  return None, traceback.format_exc()
> >              else:
> >                  return result, None
> > -        if self.currentAsyncCommand is not None:
> > -            # Wait for the idle loop to have cleared (30s max)
> > -            process_server.wait_for_idle(timeout=30)
> > -            if self.currentAsyncCommand is not None:
> > -                return None, "Busy (%s in progress)" % self.currentAsyncCommand[0]
> > +        # Wait for the idle loop to have cleared (30s max)
> > +        if not self.process_server.wait_for_idle(timeout=30):
> > +            return None, "Busy (%s in progress)" % self.process_server.get_async_cmd()[0]
> >          if command not in CommandsAsync.__dict__:
> >              return None, "No such command"
> > -        self.currentAsyncCommand = (command, commandline)
> > -        self.cooker.idleCallBackRegister(self.runAsyncCommand, None)
> > +        process_server.set_async_cmd((command, commandline))
> 
> You have a TOCTOU bug here; your wait_for_idle() should be atomic with
> the set_async_cmd().
> 
> In my PoC, I split apart set_async_cmd() from clear_async_cmd():
> 
>  def set_async_cmd(self, cmd):
>    with self.idle_cond:
>      self.idle_cond.wait_for(self._idle_check)
>      self.cooker.command.currentAsynCommand = cmd
> 
>  def clear_async_cmd(self):
>    with bb.utils.lock_timeout(self._idlefuncsLock):
>      self.cooker.command.currentAsyncCommand = None
>      self.idle_cond.notify_all()
> 
> 

Right, it is definitely a TOCTOU bug in theory. In practise since we
only have two threads and only one of them ever sets this, I don't
think it can break with the current code.

We should really fix it. I might do it in a later patch, not sure. How
to combine this nicely with the "return busy" piece isn't entirely
trivial.

Cheers,

Richard
diff mbox series

Patch

diff --git a/lib/bb/command.py b/lib/bb/command.py
index c325abbcda..c3dc872d0d 100644
--- a/lib/bb/command.py
+++ b/lib/bb/command.py
@@ -51,13 +51,14 @@  class Command:
     """
     A queue of asynchronous commands for bitbake
     """
-    def __init__(self, cooker):
+    def __init__(self, cooker, process_server):
         self.cooker = cooker
         self.cmds_sync = CommandsSync()
         self.cmds_async = CommandsAsync()
         self.remotedatastores = None
 
-        # FIXME Add lock for this
+        self.process_server = process_server
+        # Access with locking using process_server.get_async_cmd()/set_async_cmd()
         self.currentAsyncCommand = None
         self.lastEvent = None
 
@@ -100,18 +101,16 @@  class Command:
                 return None, traceback.format_exc()
             else:
                 return result, None
-        if self.currentAsyncCommand is not None:
-            # Wait for the idle loop to have cleared (30s max)
-            process_server.wait_for_idle(timeout=30)
-            if self.currentAsyncCommand is not None:
-                return None, "Busy (%s in progress)" % self.currentAsyncCommand[0]
+        # Wait for the idle loop to have cleared (30s max)
+        if not self.process_server.wait_for_idle(timeout=30):
+            return None, "Busy (%s in progress)" % self.process_server.get_async_cmd()[0]
         if command not in CommandsAsync.__dict__:
             return None, "No such command"
-        self.currentAsyncCommand = (command, commandline)
-        self.cooker.idleCallBackRegister(self.runAsyncCommand, None)
+        process_server.set_async_cmd((command, commandline))
+        self.cooker.idleCallBackRegister(self.runAsyncCommand, process_server)
         return True, None
 
-    def runAsyncCommand(self, _, _2, halt):
+    def runAsyncCommand(self, _, process_server, halt):
         try:
             self.cooker.process_inotify_updates_apply()
             if self.cooker.state in (bb.cooker.state.error, bb.cooker.state.shutdown, bb.cooker.state.forceshutdown):
@@ -119,8 +118,9 @@  class Command:
                 # and then raise BBHandledException triggering an exit
                 self.cooker.updateCache()
                 return bb.server.process.idleFinish("Cooker in error state")
-            if self.currentAsyncCommand is not None:
-                (command, options) = self.currentAsyncCommand
+            cmd = process_server.get_async_cmd()
+            if cmd is not None:
+                (command, options) = cmd
                 commandmethod = getattr(CommandsAsync, command)
                 needcache = getattr( commandmethod, "needcache" )
                 if needcache and self.cooker.state != bb.cooker.state.running:
@@ -156,7 +156,7 @@  class Command:
         bb.event.fire(event, self.cooker.data)
         self.lastEvent = event
         self.cooker.finishcommand()
-        self.currentAsyncCommand = None
+        self.process_server.set_async_cmd(None)
 
     def reset(self):
         if self.remotedatastores:
@@ -749,7 +749,7 @@  class CommandsAsync:
         """
         event = params[0]
         bb.event.fire(eval(event), command.cooker.data)
-        command.currentAsyncCommand = None
+        process_server.set_async_cmd(None)
     triggerEvent.needcache = False
 
     def resetCooker(self, command, params):
diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
index 13d6e9d847..5a0e675b44 100644
--- a/lib/bb/cooker.py
+++ b/lib/bb/cooker.py
@@ -149,7 +149,7 @@  class BBCooker:
     Manages one bitbake build run
     """
 
-    def __init__(self, featureSet=None, idleCallBackRegister=None, waitIdle=None):
+    def __init__(self, featureSet=None, server=None):
         self.recipecaches = None
         self.eventlog = None
         self.skiplist = {}
@@ -163,8 +163,12 @@  class BBCooker:
 
         self.configuration = bb.cookerdata.CookerConfiguration()
 
-        self.idleCallBackRegister = idleCallBackRegister
-        self.waitIdle = waitIdle
+        self.process_server = server
+        self.idleCallBackRegister = None
+        self.waitIdle = None
+        if server:
+            self.idleCallBackRegister = server.register_idle_function
+            self.waitIdle = server.wait_for_idle
 
         bb.debug(1, "BBCooker starting %s" % time.time())
         sys.stdout.flush()
@@ -203,7 +207,7 @@  class BBCooker:
         except UnsupportedOperation:
             pass
 
-        self.command = bb.command.Command(self)
+        self.command = bb.command.Command(self, self.process_server)
         self.state = state.initial
 
         self.parser = None
diff --git a/lib/bb/server/process.py b/lib/bb/server/process.py
index 4422fd7311..36c109ffd3 100644
--- a/lib/bb/server/process.py
+++ b/lib/bb/server/process.py
@@ -158,7 +158,7 @@  class ProcessServer():
     def wait_for_idle(self, timeout=30):
         # Wait for the idle loop to have cleared
         with self.idle_cond:
-            self.idle_cond.wait_for(lambda: len(self._idlefuns) == 0, timeout)
+            return self.idle_cond.wait_for(lambda: len(self._idlefuns) == 0 and self.cooker.command.currentAsyncCommand is None, timeout) is not False
 
     def main(self):
         self.cooker.pre_serve()
@@ -184,11 +184,9 @@  class ProcessServer():
                 self.controllersock = False
             if self.haveui:
                 # Wait for the idle loop to have cleared (30s max)
-                self.wait_for_idle(30)
-                if self.cooker.command.currentAsyncCommand is not None:
+                if not self.wait_for_idle(30):
                     serverlog("Idle loop didn't finish queued commands after 30s, exiting.")
                     self.quit = True
-
                 fds.remove(self.command_channel)
                 bb.event.unregister_UIHhandler(self.event_handle, True)
                 self.command_channel_reply.writer.close()
@@ -377,6 +375,15 @@  class ProcessServer():
                     msg.append(":\n%s" % procs)
                 serverlog("".join(msg))
 
+    def get_async_cmd(self):
+        with bb.utils.lock_timeout(self._idlefuncsLock):
+            return self.cooker.command.currentAsyncCommand
+
+    def set_async_cmd(self, cmd):
+        with bb.utils.lock_timeout(self._idlefuncsLock):
+            self.cooker.command.currentAsyncCommand = cmd
+            self.idle_cond.notify_all()
+
     def idle_thread(self):
         def remove_idle_func(function):
             with bb.utils.lock_timeout(self._idlefuncsLock):
@@ -427,7 +434,7 @@  class ProcessServer():
                 lastdebug = time.time()
                 with bb.utils.lock_timeout(self._idlefuncsLock):
                     num_funcs = len(self._idlefuns)
-                serverlog("Current command %s, idle functions %s, last exit event %s, state %s" % (self.cooker.command.currentAsyncCommand, num_funcs, self.cooker.command.lastEvent, self.cooker.state))
+                serverlog("Current command %s, idle functions %s, last exit event %s, state %s" % (self.get_async_cmd(), num_funcs, self.cooker.command.lastEvent, self.cooker.state))
 
             if nextsleep is not None:
                 select.select(fds,[],[],nextsleep)[0]
@@ -632,7 +639,7 @@  def execServer(lockfd, readypipeinfd, lockname, sockname, server_timeout, xmlrpc
         writer = ConnectionWriter(readypipeinfd)
         try:
             featureset = []
-            cooker = bb.cooker.BBCooker(featureset, server.register_idle_function, server.wait_for_idle)
+            cooker = bb.cooker.BBCooker(featureset, server)
             cooker.configuration.profile = profile
         except bb.BBHandledException:
             return None