@@ -78,25 +78,35 @@ def mirror_processor(mirrordir):
time.sleep(30*60) # 30 minutes
return
-#Check to see if this is running already. If so, kill it and rerun
+# Check to see if this is running already. If so, kill it and rerun
if os.path.exists(tmpfile) and os.path.isfile(tmpfile):
print("A prior PID file exists. Attempting to kill.")
with open(tmpfile, 'r') as f:
- pid=f.readline()
+ pid_str = f.readline().strip()
try:
- os.kill(int(pid), signal.SIGKILL)
- # We need to sleep for a second or two just to give the SIGKILL time
- time.sleep(2)
- except OSError as ex:
- print("""We weren't able to kill the prior buildworker-janitor. Trying again.""")
- pass
- # Check if the process that we killed is alive.
- try:
- os.kill(int(pid), 0)
- except OSError as ex:
- pass
+ pid = int(pid_str)
+ except ValueError:
+ print("PID file contains invalid data, ignoring.")
+ pid = None
+ if pid is not None:
+ # Check if the process is actually running before trying to kill it
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ print("Prior process (PID %d) is not running, skipping kill." % pid)
+ pid = None
+ if pid is not None:
+ os.kill(pid, signal.SIGTERM)
+ for _ in range(10):
+ time.sleep(1)
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ break
+ else:
+ print("Process %d did not exit after SIGTERM, giving up." % pid)
elif os.path.exists(tmpfile) and not os.path.isfile(tmpfile):
- raise Exception("""/tmp/.buildworker-janitor is a director. remove it to continue.""")
+ raise Exception("/tmp/.buildworker-janitor is a directory. remove it to continue.")
try:
os.unlink(tmpfile)
except:
Prevent stuck ab-janitor.service units by handling bad pidfile data. Improve the exit logic for existing processes. Signed-off-by: Michael Halstead <mhalstead@linuxfoundation.org> --- janitor/ab-janitor | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-)