Add timeout_msg values in fuelweb_test and system_test

- added waiters for node status
- removed redundant try/except/assert

Related blueprint: fuel-qa-failure-reason-grouping
Change-Id: I06ccac42856712d1f9afe64402d34aeba3c07af6
This commit is contained in:
Anton Studenov 2016-05-20 14:15:50 +03:00 committed by Alexey Stepanov
parent 5a62e85194
commit ef2ace7f65
8 changed files with 145 additions and 157 deletions

View File

@ -256,7 +256,8 @@ def enable_feature_group(env, group):
except (HTTPError, URLError):
return False
wait(check_api_group_enabled, interval=10, timeout=60 * 20)
wait(check_api_group_enabled, interval=10, timeout=60 * 20,
timeout_msg='Failed to enable feature group - {!r}'.format(group))
def find_backup(ip):
@ -331,13 +332,13 @@ def check_mysql(ip, node_name):
" information_schema.GLOBAL_STATUS"
" WHERE VARIABLE_NAME"
" = 'wsrep_local_state_comment';\"")
try:
wait(lambda: ssh_manager.execute(ip, check_cmd)['exit_code'] == 0,
timeout=10 * 60)
logger.info('MySQL daemon is started on {0}'.format(node_name))
except TimeoutError:
logger.error('MySQL daemon is down on {0}'.format(node_name))
raise
wait(lambda: ssh_manager.execute(ip, check_cmd)['exit_code'] == 0,
timeout=10 * 60,
timeout_msg='MySQL daemon is down on {0}'.format(node_name))
logger.info('MySQL daemon is started on {0}'.format(node_name))
# TODO(astudenov): add timeout_msg
_wait(
lambda: assert_equal(
ssh_manager.execute(
@ -348,7 +349,9 @@ def check_mysql(ip, node_name):
timeout=120)
try:
wait(lambda: ''.join(ssh_manager.execute(
ip, check_galera_cmd)['stdout']).rstrip() == 'Synced', timeout=600)
ip, check_galera_cmd)['stdout']).rstrip() == 'Synced', timeout=600,
timeout_msg='galera status != "Synced" on node {!r} with ip {}'
''.format(node_name, ip))
except TimeoutError:
logger.error('galera status is {0}'.format(''.join(ssh_manager.execute(
ip, check_galera_cmd)['stdout']).rstrip()))

View File

@ -200,7 +200,9 @@ class AdminActions(BaseActions):
@logwrap
def wait_for_fuel_ready(self, timeout=300):
wait(lambda: self.is_fuel_ready, timeout=timeout)
wait(lambda: self.is_fuel_ready, timeout=timeout,
timeout_msg="Fuel services are not ready, please check the "
"output of 'fuel-utils check_all")
@logwrap
@retry()

View File

@ -168,7 +168,9 @@ class NessusClient(object):
file_id = self.post('/scans/{0}/export'.format(scan_id),
body=export_def)['file']
wait(lambda: self.check_scan_export_status(scan_id, file_id),
interval=10, timeout=600)
interval=10, timeout=600,
timeout_msg='Nessus export scan status != "ready" for '
' scan_id={} file_id={}'.format(scan_id, file_id))
return file_id
def download_scan_result(

View File

@ -297,7 +297,8 @@ class RallyTask(object):
cmd = ("awk 'BEGIN{{retval=1}};/^Using task:/{{print $NF; retval=0}};"
"END {{exit retval}}' {0}").format(temp_file)
wait(lambda: self.engine.run_container_command(cmd)['exit_code'] == 0,
timeout=30)
timeout=30, timeout_msg='Rally task {!r} creation timeout'
''.format(result))
result = self.engine.run_container_command(cmd)
task_uuid = ''.join(result['stdout']).strip()
assert_true(task_uuid in self.engine.list_tasks(),
@ -421,6 +422,6 @@ class RallyBenchmarkTest(object):
'{0}'.format(self.current_task.status))
if result:
wait(lambda: self.current_task.status == 'finished',
timeout=timeout)
timeout=timeout, timeout_msg='Rally benchmark test timeout')
logger.info('Rally benchmark test is finished.')
return RallyResult(json_results=self.current_task.get_results())

View File

@ -16,7 +16,6 @@ import re
import subprocess
import time
from devops.error import TimeoutError
from devops.helpers.helpers import _tcp_ping
from devops.helpers.helpers import _wait
from devops.helpers.helpers import wait
@ -121,7 +120,9 @@ class EnvironmentModel(object):
time.sleep(5)
with TimeStat("wait_for_nodes_to_start_and_register_in_nailgun"):
wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout)
wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout,
timeout_msg='Bootstrap timeout for nodes: {}'
''.format([node.name for node in devops_nodes]))
if not skip_timesync:
self.sync_time()
@ -287,14 +288,7 @@ class EnvironmentModel(object):
# Bug: 1455753
time.sleep(30)
for node in devops_nodes:
try:
wait(lambda:
self.fuel_web.get_nailgun_node_by_devops_node(
node)['online'], timeout=60 * 6)
except TimeoutError:
raise TimeoutError(
"Node {0} does not become online".format(node.name))
self.fuel_web.wait_nodes_get_online_state(devops_nodes, timeout=60 * 6)
return True
def revert_snapshot(self, name, skip_timesync=False,
@ -314,6 +308,7 @@ class EnvironmentModel(object):
self.sync_time()
try:
with QuietLogger():
# TODO(astudenov): add timeout_msg
_wait(
self.fuel_web.client.get_releases,
expected=(
@ -327,6 +322,7 @@ class EnvironmentModel(object):
logger.exception(
'Unexpected exception while tried to get releases')
if not skip_slaves_check:
# TODO(astudenov): add timeout_msg
_wait(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
return True
@ -407,7 +403,8 @@ class EnvironmentModel(object):
self.d_env.start([admin])
logger.info("Waiting for admin node to start up")
wait(lambda: admin.driver.node_active(admin), 60)
wait(lambda: admin.driver.node_active(admin), 60,
timeout_msg='Admin node startup timeout')
logger.info("Proceed with installation")
# update network parameters at boot screen
admin.send_keys(self.get_keys(admin, custom=custom,
@ -452,7 +449,9 @@ class EnvironmentModel(object):
wait(lambda: (
self.ssh_manager.execute_on_remote(
admin_node_ip, cmd)['stdout'][0] != 'dead'), interval=10,
timeout=30)
timeout=30,
timeout_msg='Nginx service is dead after trying to enable '
'it with the command: {}'.format(cmd))
# pylint: disable=no-self-use
@update_rpm_packages
@ -465,6 +464,7 @@ class EnvironmentModel(object):
@logwrap
def wait_for_provisioning(self,
timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT):
# TODO(astudenov): add timeout_msg
_wait(lambda: _tcp_ping(
self.d_env.nodes(
).admin.get_ip_address_by_network_name
@ -496,14 +496,19 @@ class EnvironmentModel(object):
wait(lambda: self.ssh_manager.exists_on_remote(
self.ssh_manager.admin_ip,
'/var/lock/wait_for_external_config'),
timeout=600)
timeout=600,
timeout_msg='wait_for_external_config lock file timeout '
'while bootstrapping the Fuel master node')
check_cmd = 'pkill -0 -f wait_for_external_config'
wait(
lambda: self.ssh_manager.execute(
ip=self.ssh_manager.admin_ip,
cmd=check_cmd)['exit_code'] == 0, timeout=timeout)
cmd=check_cmd)['exit_code'] == 0,
timeout=timeout,
timeout_msg='wait_for_external_config process timeout '
'while bootstrapping the Fuel master node')
@logwrap
def kill_wait_for_external_config(self):
@ -529,7 +534,9 @@ class EnvironmentModel(object):
ip=self.ssh_manager.admin_ip,
cmd="grep 'Fuel node deployment' '{:s}'".format(log_path)
)['exit_code'] == 0,
timeout=(float(settings.ADMIN_NODE_BOOTSTRAP_TIMEOUT))
timeout=(float(settings.ADMIN_NODE_BOOTSTRAP_TIMEOUT)),
timeout_msg='Fuel master node bootstrap timeout, '
'please check the log {}'.format(log_path)
)
result = self.ssh_manager.execute(
ip=self.ssh_manager.admin_ip,

View File

@ -136,7 +136,9 @@ class FuelWebClient29(object):
lambda: all([run['status'] == 'finished'
for run in
self.client.get_ostf_test_run(cluster_id)]),
timeout=timeout)
timeout=timeout,
timeout_msg='OSTF tests run timeout '
'(cluster_id={})'.format(cluster_id))
return self.client.get_ostf_test_run(cluster_id)
@logwrap
@ -161,6 +163,7 @@ class FuelWebClient29(object):
def assert_cluster_ready(self, os_conn, smiles_count,
networks_count=2, timeout=300):
logger.info('Assert cluster services are UP')
# TODO(astudenov): add timeout_msg
_wait(
lambda: self.get_cluster_status(
os_conn,
@ -177,6 +180,7 @@ class FuelWebClient29(object):
logger.info('Waiting {0} sec. for passed OSTF HA tests.'
.format(timeout))
with QuietLogger(logging.ERROR):
# TODO(astudenov): add timeout_msg
_wait(lambda: self.run_ostf(cluster_id,
test_sets=['ha'],
should_fail=should_fail),
@ -194,6 +198,7 @@ class FuelWebClient29(object):
logger.info('Waiting {0} sec. for passed OSTF Sanity checks.'
.format(timeout))
with QuietLogger():
# TODO(astudenov): add timeout_msg
_wait(lambda: self.run_ostf(cluster_id,
test_sets=['sanity'],
should_fail=should_fail),
@ -398,6 +403,24 @@ class FuelWebClient29(object):
checkers.fail_deploy(not_ready_transactions)
def wait_node_is_online(self, devops_node, timeout=60 * 5):
logger.info(
'Wait for node {!r} online status'.format(devops_node.name))
wait(lambda: self.get_nailgun_node_by_devops_node(
devops_node)['online'],
timeout=timeout,
timeout_msg='Node {!r} failed to become online'
''.format(devops_node.name))
def wait_node_is_offline(self, devops_node, timeout=60 * 5):
logger.info(
'Wait for node {!r} offline status'.format(devops_node.name))
wait(lambda: not self.get_nailgun_node_by_devops_node(
devops_node)['online'],
timeout=timeout,
timeout_msg='Node {!r} failed to become offline'
''.format(devops_node.name))
@logwrap
def fqdn(self, devops_node):
logger.info('Get FQDN of a devops node %s', devops_node.name)
@ -1270,6 +1293,14 @@ class FuelWebClient29(object):
node['mac'] == nailgun_node['mac'] and
node['status'] == 'discover', self.client.list_nodes()))
def wait_node_is_discovered(self, nailgun_node, timeout=6 * 60):
logger.info('Wait for node {!r} to become discovered'
''.format(nailgun_node['name']))
wait(lambda: self.is_node_discovered(nailgun_node),
timeout=timeout,
timeout_msg='Node {!r} failed to become discovered'
''.format(nailgun_node['name']))
@logwrap
def run_network_verify(self, cluster_id):
logger.info('Run network verification on the cluster %s', cluster_id)
@ -1334,17 +1365,15 @@ class FuelWebClient29(object):
logger.info('Wait for task {0} seconds: {1}'.format(
timeout, pretty_log(task, indent=1)))
start = time.time()
try:
wait(
lambda: (self.client.get_task(task['id'])['status']
not in ('pending', 'running')),
interval=interval,
timeout=timeout
)
except TimeoutError:
raise TimeoutError(
"Waiting task \"{task}\" timeout {timeout} sec "
"was exceeded: ".format(task=task["name"], timeout=timeout))
wait(
lambda: (self.client.get_task(task['id'])['status']
not in ('pending', 'running')),
interval=interval,
timeout=timeout,
timeout_msg='Waiting task {0!r} timeout {1} sec '
'was exceeded'.format(task['name'], timeout))
took = time.time() - start
task = self.client.get_task(task['id'])
logger.info('Task finished. Took {0} seconds. {1}'.format(
@ -1354,21 +1383,15 @@ class FuelWebClient29(object):
@logwrap
def task_wait_progress(self, task, timeout, interval=5, progress=None):
try:
logger.info(
'start to wait with timeout {0} '
'interval {1}'.format(timeout, interval))
wait(
lambda: self.client.get_task(
task['id'])['progress'] >= progress,
interval=interval,
timeout=timeout
)
except TimeoutError:
raise TimeoutError(
"Waiting task \"{task}\" timeout {timeout} sec "
"was exceeded: ".format(task=task["name"], timeout=timeout))
logger.info('start to wait with timeout {0} '
'interval {1}'.format(timeout, interval))
wait(
lambda: self.client.get_task(
task['id'])['progress'] >= progress,
interval=interval,
timeout=timeout,
timeout_msg='Waiting task {0!r} timeout {1} sec '
'was exceeded'.format(task["name"], timeout))
return self.client.get_task(task['id'])
# TODO(ddmitriev): this method will be replaced
@ -1415,12 +1438,8 @@ class FuelWebClient29(object):
node_group, node_roles = self.get_node_group_and_role(node_name,
nodes_dict)
wait(lambda:
self.get_nailgun_node_by_devops_node(devops_node)['online'],
timeout=60 * 2)
self.wait_node_is_online(devops_node, timeout=60 * 2)
node = self.get_nailgun_node_by_devops_node(devops_node)
assert_true(node['online'],
'Node {0} is offline'.format(node['mac']))
if custom_names:
name = custom_names.get(node_name,
@ -1979,33 +1998,14 @@ class FuelWebClient29(object):
remote.check_call('/sbin/shutdown -Ph now')
for node in devops_nodes:
logger.info('Wait a %s node offline status', node.name)
try:
wait(
lambda: not self.get_nailgun_node_by_devops_node(node)[
'online'], timeout=60 * 10)
except TimeoutError:
assert_false(
self.get_nailgun_node_by_devops_node(node)['online'],
'Node {0} has not become '
'offline after warm shutdown'.format(node.name))
self.wait_node_is_offline(node)
node.destroy()
def warm_start_nodes(self, devops_nodes):
logger.info('Starting nodes %s', [n.name for n in devops_nodes])
for node in devops_nodes:
node.create()
for node in devops_nodes:
try:
wait(
lambda: self.get_nailgun_node_by_devops_node(
node)['online'], timeout=60 * 10)
except TimeoutError:
assert_true(
self.get_nailgun_node_by_devops_node(node)['online'],
'Node {0} has not become online '
'after warm start'.format(node.name))
logger.debug('Node {0} became online.'.format(node.name))
self.wait_nodes_get_online_state(devops_nodes)
def warm_restart_nodes(self, devops_nodes):
logger.info('Reboot (warm restart) nodes %s',
@ -2023,15 +2023,8 @@ class FuelWebClient29(object):
node.destroy()
for node in devops_nodes:
if wait_offline:
logger.info('Wait a %s node offline status', node.name)
try:
wait(lambda: not self.get_nailgun_node_by_devops_node(
node)['online'], timeout=60 * 10)
except TimeoutError:
assert_false(
self.get_nailgun_node_by_devops_node(node)['online'],
'Node {0} has not become offline after '
'cold restart'.format(node.name))
self.wait_node_is_offline(node)
if wait_after_destroy:
time.sleep(wait_after_destroy)
@ -2040,15 +2033,7 @@ class FuelWebClient29(object):
node.create()
if wait_online:
for node in devops_nodes:
try:
wait(
lambda: self.get_nailgun_node_by_devops_node(
node)['online'], timeout=60 * 10)
except TimeoutError:
assert_true(
self.get_nailgun_node_by_devops_node(node)['online'],
'Node {0} has not become online'
' after cold start'.format(node.name))
self.wait_node_is_online(node)
self.environment.sync_time()
@logwrap
@ -2168,18 +2153,7 @@ class FuelWebClient29(object):
@logwrap
def wait_nodes_get_online_state(self, nodes, timeout=4 * 60):
for node in nodes:
logger.info('Wait for %s node online status', node.name)
try:
wait(lambda:
self.get_nailgun_node_by_devops_node(node)['online'],
timeout=timeout)
except TimeoutError:
assert_true(
self.get_nailgun_node_by_devops_node(node)['online'],
'Node {0} has not become online'.format(node.name))
node = self.get_nailgun_node_by_devops_node(node)
assert_true(node['online'],
'Node {0} is online'.format(node['mac']))
self.wait_node_is_online(node, timeout=timeout)
@logwrap
def wait_mysql_galera_is_up(self, node_names, timeout=60 * 4):
@ -2214,14 +2188,10 @@ class FuelWebClient29(object):
logger.info("Waiting for all Cinder services up.")
for node_name in node_names:
node = self.get_nailgun_node_by_name(node_name)
try:
wait(lambda: checkers.check_cinder_status(node['ip']),
timeout=300)
logger.info("All Cinder services up.")
except TimeoutError:
logger.error("Cinder services not ready.")
raise TimeoutError(
"Cinder services not ready. ")
wait(lambda: checkers.check_cinder_status(node['ip']),
timeout=300,
timeout_msg='Cinder services not ready')
logger.info("All Cinder services up.")
return True
def run_ostf_repeatably(self, cluster_id, test_name=None,
@ -2315,7 +2285,8 @@ class FuelWebClient29(object):
"on node %s", fqdn)
ceph.restart_monitor(remote_to_mon)
wait(lambda: not ceph.is_clock_skew(remote), timeout=120)
wait(lambda: not ceph.is_clock_skew(remote), timeout=120,
timeout_msg='check ceph time skew timeout')
@logwrap
def check_ceph_status(self, cluster_id, offline_nodes=(),
@ -2329,14 +2300,11 @@ class FuelWebClient29(object):
for node in online_ceph_nodes:
with self.environment.d_env\
.get_ssh_to_remote(node['ip']) as remote:
try:
wait(lambda: ceph.check_service_ready(remote) is True,
interval=20, timeout=600)
except TimeoutError:
error_msg = 'Ceph service is not properly started' \
' on {0}'.format(node['name'])
logger.error(error_msg)
raise TimeoutError(error_msg)
wait(lambda: ceph.check_service_ready(remote) is True,
interval=20, timeout=600,
timeout_msg='Ceph service is not properly started'
' on {0}'.format(node['name']))
logger.info('Ceph service is ready. Checking Ceph Health...')
self.check_ceph_time_skew(cluster_id, offline_nodes)
@ -2753,7 +2721,8 @@ class FuelWebClient29(object):
for osd_id in ids:
remote_ceph.execute("ceph osd out {}".format(osd_id))
wait(lambda: ceph.is_health_ok(remote_ceph),
interval=30, timeout=10 * 60)
interval=30, timeout=10 * 60,
timeout_msg='ceph helth ok timeout')
for osd_id in ids:
if OPENSTACK_RELEASE_UBUNTU in OPENSTACK_RELEASE:
remote_ceph.execute("stop ceph-osd id={}".format(osd_id))

View File

@ -174,7 +174,8 @@ class PrepareActions(object):
device='cdrom').volume.upload(cloud_image_settings_path)
self.env.d_env.start([admin])
logger.info("Waiting for Centos node to start up")
wait(lambda: admin.driver.node_active(admin), 60)
wait(lambda: admin.driver.node_active(admin), 60,
timeout_msg='Centos node failed to start')
logger.info("Waiting for Centos node ssh ready")
self.env.wait_for_provisioning()

View File

@ -63,7 +63,8 @@ class StrengthActions(object):
n_nodes = [node['online'] for node in n_nodes]
return n_nodes.count(False) == 0
wait(wait_offline_nodes, timeout=60 * 5)
wait(wait_offline_nodes, timeout=60 * 5,
timeout_msg='Nodes failed to become offline')
@deferred_decorator([make_snapshot_if_step_fail])
@action
@ -284,25 +285,26 @@ class FillRootActions(object):
'resources_running'] == '0'
wait(checking_health_disk_attribute,
"Attribute #health_disk wasn't appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk wasn't appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
wait(checking_for_red_in_health_disk_attribute,
"Attribute #health_disk doesn't have 'red' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk doesn't have 'red' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
wait(check_stopping_resources,
"Attribute 'running_resources' doesn't have '0' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
timeout=self.pcs_check_timeout,
timeout_msg="Attribute 'running_resources' "
"doesn't have '0' value "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
@deferred_decorator([make_snapshot_if_step_fail])
@action
@ -363,16 +365,17 @@ class FillRootActions(object):
'resources_running'] == self.slave_node_running_resources
wait(checking_health_disk_attribute_is_not_present,
"Attribute #health_disk was appeared in attributes "
"on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout),
timeout=self.pcs_check_timeout)
timeout=self.pcs_check_timeout,
timeout_msg="Attribute #health_disk was appeared "
"in attributes on node {} in {} seconds".format(
self.primary_controller_fqdn,
self.pcs_check_timeout))
wait(check_started_resources,
"Attribute 'running_resources' doesn't have {} value "
"on node {} in {} seconds".format(
self.slave_node_running_resources,
self.primary_controller_fqdn,
self.pcs_check_timeout),
self.pcs_check_timeout)
timeout=self.pcs_check_timeout,
timeout_msg="Attribute 'running_resources' "
"doesn't have {} value "
"on node {} in {} seconds".format(
self.slave_node_running_resources,
self.primary_controller_fqdn,
self.pcs_check_timeout))