Change wait_for_boot method for node availability

The current wait_for_boot method uses a socket connection to port 22
to determine whether commands can be run via SSH on a given node.
However this is not definitive as even if a connection to port 22
can be made, SSH may not be immediately available. A more robust
approach is to repeatedly attempt a basic command until it executes
successfully.

Change-Id: Ib80e29f947a47e1b5349236a7e9db42b59c09b10
This commit is contained in:
Ben Cooper 2017-07-21 06:50:25 +01:00 committed by Benjamin Michael Cooper
parent b0c22a2505
commit 8060628bee
3 changed files with 18 additions and 33 deletions

View File

@ -63,9 +63,9 @@ class TimeoutException(RamDiskTestException):
_msg = "Timeout expired."
class NetServiceStartTimeout(TimeoutException):
_msg = ("Timeout %(timeout)ss for waiting for IP %(ip)s port %(port)s "
"to start expired.")
class NodeSSHTimeout(TimeoutException):
_msg = ("Waiting for ssh to become available on node %(node_name)s "
"exceeded timeout %(timeout)ss.")
class NodeCallbackTimeout(TimeoutException):

View File

@ -136,7 +136,21 @@ class Node(base.LibvirtBase):
def wait_for_boot(self):
LOG.info("Waiting {0} node to boot".format(
self.name))
utils.wait_net_service(self.ip, 22, timeout=CONF.node_boot_timeout)
timeout = CONF.node_boot_timeout
end = time() + timeout
while time() < end:
try:
self.run_cmd('ls') # dummy cmd to check connection
return
except(paramiko.ssh_exception.NoValidConnectionsError,
paramiko.ssh_exception.SSHException):
pass
sleep(1)
raise exception.NodeSSHTimeout(timeout=timeout,
node_name=self.name)
def wait_for_callback(self):

View File

@ -18,15 +18,11 @@ import os
import logging
import shutil
import random
import socket
from subprocess import check_output
from time import time
from time import sleep
from oslo_config import cfg
from ramdisk_func_test import conf
from ramdisk_func_test import exception
CONF = conf.CONF
@ -77,31 +73,6 @@ def get_random_mac():
return "52:54:00:%02x:%02x:%02x" % (rnd(), rnd(), rnd())
def wait_net_service(ip, port, timeout, try_interval=2):
"""Wait for network service to appear"""
LOG.info("Waiting for IP {0} port {1} to start".format(ip, port))
s = socket.socket()
s.settimeout(try_interval)
end = time() + timeout
while time() < end:
try:
s.connect((ip, port))
except socket.timeout:
# cannot connect after timeout
continue
except socket.error:
# cannot connect immediately (e.g. no route)
# wait timeout before next try
sleep(try_interval)
continue
else:
# success!
s.close()
return
raise exception.NetServiceStartTimeout(timeout=timeout, ip=ip, port=port)
class FakeGlobalSectionHead(object):
def __init__(self, fp):
self.fp = fp