diff mbox series

[yocto-autobuilder-helper,1/2] janitor/ab-janitor: handle invalid pidfile data

Message ID 20260403-contrib-halstead-ab-janitor-v1-1-91b4bb47c0d7@linuxfoundation.org
State New
Headers show
Series janitor/ab-janitor: fix reliability and security issues | expand

Commit Message

Michael Halstead April 3, 2026, 9:39 p.m. UTC
Prevent stuck ab-janitor.service units by handling bad pidfile data.
Improve the exit logic for existing processes.

Signed-off-by: Michael Halstead <mhalstead@linuxfoundation.org>
---
 janitor/ab-janitor | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/janitor/ab-janitor b/janitor/ab-janitor
index 57749d6..281d7b9 100755
--- a/janitor/ab-janitor
+++ b/janitor/ab-janitor
@@ -78,25 +78,35 @@  def mirror_processor(mirrordir):
         time.sleep(30*60) # 30 minutes
     return
 
-#Check to see if this is running already. If so, kill it and rerun
+# Check to see if this is running already. If so, kill it and rerun
 if os.path.exists(tmpfile) and os.path.isfile(tmpfile):
     print("A prior PID file exists. Attempting to kill.")
     with open(tmpfile, 'r') as f:
-        pid=f.readline()
+        pid_str = f.readline().strip()
     try:
-        os.kill(int(pid), signal.SIGKILL)
-        # We need to sleep for a second or two just to give the SIGKILL time
-        time.sleep(2)
-    except OSError as ex:
-        print("""We weren't able to kill the prior buildworker-janitor. Trying again.""")
-        pass
-    # Check if the process that we killed is alive.
-    try:
-       os.kill(int(pid), 0)
-    except OSError as ex:
-       pass
+        pid = int(pid_str)
+    except ValueError:
+        print("PID file contains invalid data, ignoring.")
+        pid = None
+    if pid is not None:
+        # Check if the process is actually running before trying to kill it
+        try:
+            os.kill(pid, 0)
+        except OSError:
+            print("Prior process (PID %d) is not running, skipping kill." % pid)
+            pid = None
+    if pid is not None:
+        os.kill(pid, signal.SIGTERM)
+        for _ in range(10):
+            time.sleep(1)
+            try:
+                os.kill(pid, 0)
+            except OSError:
+                break
+        else:
+            print("Process %d did not exit after SIGTERM, giving up." % pid)
 elif os.path.exists(tmpfile) and not os.path.isfile(tmpfile):
-    raise Exception("""/tmp/.buildworker-janitor is a director. remove it to continue.""")
+    raise Exception("/tmp/.buildworker-janitor is a directory. remove it to continue.")
 try:
     os.unlink(tmpfile)
 except: