diff mbox series

oeqa/runtime/ssh: add retry logic and sleeps to allow for slower systems

Message ID 20240624222024.731269-1-jdmason@kudzu.us
State Accepted, archived
Commit f0fe0b490d309cdf1c97754f85a61b5b948b7f28
Headers show
Series oeqa/runtime/ssh: add retry logic and sleeps to allow for slower systems | expand

Commit Message

Jon Mason June 24, 2024, 10:20 p.m. UTC
On exceptionally slow systems, the ssh test can intermittently fail due
to a race between when ping works and the networking applications being
brought up.  To work around this issue, add some retry logic when ssh
fails to connect.  According to the man page of ssh, "ssh exits
with the exit status of the remote command or with 255 if an error
occurred."  So, only retry if the return code is 255, and limit the
number of retries to prevent it looping forever.

Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 meta/lib/oeqa/runtime/cases/ssh.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/meta/lib/oeqa/runtime/cases/ssh.py b/meta/lib/oeqa/runtime/cases/ssh.py
index cdbef595008c..ae92bb34cd9c 100644
--- a/meta/lib/oeqa/runtime/cases/ssh.py
+++ b/meta/lib/oeqa/runtime/cases/ssh.py
@@ -4,6 +4,8 @@ 
 # SPDX-License-Identifier: MIT
 #
 
+import time
+
 from oeqa.runtime.case import OERuntimeTestCase
 from oeqa.core.decorator.depends import OETestDepends
 from oeqa.runtime.decorator.package import OEHasPackage
@@ -13,12 +15,20 @@  class SSHTest(OERuntimeTestCase):
     @OETestDepends(['ping.PingTest.test_ping'])
     @OEHasPackage(['dropbear', 'openssh-sshd'])
     def test_ssh(self):
-        (status, output) = self.target.run('sleep 20', timeout=2)
-        msg='run() timed out but return code was zero.'
-        self.assertNotEqual(status, 0, msg=msg)
-        (status, output) = self.target.run('uname -a')
-        self.assertEqual(status, 0, msg='SSH Test failed: %s' % output)
-        (status, output) = self.target.run('cat /etc/controllerimage')
-        msg = "This isn't the right image  - /etc/controllerimage " \
-              "shouldn't be here %s" % output
-        self.assertEqual(status, 1, msg=msg)
+        for i in range(5):
+          status, output = self.target.run("uname -a", timeout=5)
+          if status == 0:
+              break
+          elif status == 255:
+              # ssh returns 255 only if a ssh error occurs.  This could
+              # be an issue with "Connection refused" because the port
+              # isn't open yet, and this could check explicitly for that
+              # here.  However, let's keep it simple and just retry for
+              # all errors a limited amount of times with a sleep to
+              # give it time for the port to open.
+              time.sleep(5)
+              continue
+          else:
+              self.fail("uname failed with \"%s\"" %output)
+        if status == 255:
+            self.fail("ssh error %s" %output)