Merge "Add extra computes failover tests"

This commit is contained in:
Jenkins 2016-03-25 17:11:49 +00:00 committed by Gerrit Code Review
commit 072adcf808
4 changed files with 209 additions and 17 deletions

View File

@ -13,7 +13,6 @@
# under the License.
import random
import time
from devops.error import TimeoutError
from devops.helpers import helpers
@ -163,18 +162,25 @@ class OpenStackActions(common.Common):
"Instance do not reach active state, current state"
" is {0}".format(self.get_instance_detail(srv).status))
def verify_srv_deleted(self, srv):
try:
if self.get_instance_detail(srv.id):
logger.info("Try getting server another time.")
time.sleep(30)
if self.get_instance_detail(srv.id) in \
self.nova.servers.list():
return False
except Exception:
def is_srv_deleted(self, srv):
if srv in self.nova.servers.list():
logger.info("Server found in server list")
return False
else:
logger.info("Server was successfully deleted")
return True
def verify_srv_deleted(self, srv, timeout=150):
try:
server = self.get_instance_detail(srv.id)
except Exception:
logger.info("Server was successfully deleted")
return
helpers.wait(lambda: self.is_srv_deleted(server),
interval=2, timeout=timeout,
timeout_msg="Server wasn't deleted in "
"{0} seconds".format(timeout))
def assign_floating_ip(self, srv, use_neutron=False):
if use_neutron:
# Find external net id for tenant

View File

@ -293,7 +293,8 @@ class EnvironmentModel(object):
"Node {0} does not become online".format(node.name))
return True
def revert_snapshot(self, name, skip_timesync=False):
def revert_snapshot(self, name, skip_timesync=False,
skip_slaves_check=False):
if not self.d_env.has_snapshot(name):
return False
@ -314,7 +315,8 @@ class EnvironmentModel(object):
self.set_admin_keystone_password()
self.fuel_web.get_nailgun_version()
_wait(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
if not skip_slaves_check:
_wait(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
return True
def set_admin_ssh_password(self):

View File

@ -701,8 +701,7 @@ class VmBackedWithCephMigrationBasic(TestBasic):
logger.info("Terminate migrated server")
os.delete_instance(new_srv)
assert_true(os.verify_srv_deleted(new_srv),
"Verify server was deleted")
os.verify_srv_deleted(new_srv)
self.show_step(11)
# Check if the dhcp lease for instance still remains
@ -788,8 +787,7 @@ class VmBackedWithCephMigrationBasic(TestBasic):
self.show_step(18)
logger.info("Terminate migrated server")
os.delete_instance(new_srv)
assert_true(os.verify_srv_deleted(new_srv),
"Verify server was deleted")
os.verify_srv_deleted(new_srv)
self.env.make_snapshot(
"vm_backed_with_ceph_live_migration")

View File

@ -33,6 +33,22 @@ from fuelweb_test.tests.base_test_case import TestBasic
class RhBase(TestBasic):
"""RH-based compute tests base"""
def check_slaves_are_ready(self):
devops_nodes = [node for node in self.env.d_env.nodes().slaves
if node.driver.node_active(node)]
for node in devops_nodes:
ip = self.fuel_web.get_node_ip_by_devops_name(node.name)
try:
self.wait_for_slave_provision(ip)
except TimeoutError:
asserts.assert_true(
tcp_ping(ip, 22),
'Node {0} has not become online '
'after revert'.format(node.name))
logger.debug('Node {0} became online.'.format(node.name))
return True
@staticmethod
def wait_for_slave_provision(node_ip, timeout=10 * 60):
"""Wait for a target node provision.
@ -93,7 +109,7 @@ class RhBase(TestBasic):
tcp_ping(ip, 22),
'Node {0} has not become online '
'after warm start'.format(node.name))
logger.debug('Node {0} became online.'.format(node.name))
logger.info('Node {0} became online.'.format(node.name))
@staticmethod
def connect_rh_image(slave):
@ -888,3 +904,173 @@ class RhHAOneControllerMigration(RhBase):
test_sets=['smoke', 'sanity'])
self.env.make_snapshot("ready_ha_one_controller_with_rh_computes")
@test(groups=['rh', 'rh.failover_group'])
class RHFailoverGroup(RhBase):
"""Failover tests for RH-based computes"""
@test(depends_on_groups=['deploy_rh_compute_ha_one_controller_tun'],
groups=['check_rh_warm_reboot'])
@log_snapshot_after_test
def check_rh_warm_reboot(self):
"""Check that resumed VM is working properly after warm reboot of
RH-based compute
Scenario:
1. Revert environment with RH-compute.
2. Check that services are ready.
3. Boot VM on compute and check its connectivity via floating ip.
4. Warm reboot RH-based compute.
5. Verify VM connectivity via floating ip after successful reboot
and VM resume action.
Duration 20m
Snapshot check_rh_warm_reboot
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('ready_ha_one_controller_with_rh_compute',
skip_timesync=True, skip_slaves_check=True)
self.check_slaves_are_ready()
logger.debug('All slaves online.')
self.show_step(2)
cluster_id = self.fuel_web.get_last_created_cluster()
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id))
self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5)
logger.debug('Cluster up and ready.')
self.show_step(3)
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
asserts.assert_equal(len(controllers), 1,
'Environment does not have 1 controller node, '
'found {} nodes!'.format(len(controllers)))
compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, ['compute'])[0]
target_node = self.fuel_web.get_devops_node_by_nailgun_node(
compute)
net_label = self.fuel_web.get_cluster_predefined_networks_name(
cluster_id)['private_net']
vm = os_conn.create_server_for_migration(
neutron=True, label=net_label)
vm_floating_ip = os_conn.assign_floating_ip(vm)
logger.info('Trying to get vm via tcp.')
try:
wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120)
except TimeoutError:
raise TimeoutError('Can not ping instance'
' by floating ip {0}'.format(vm_floating_ip.ip))
logger.info('VM is accessible via ip: {0}'.format(vm_floating_ip.ip))
self.show_step(4)
self.warm_restart_nodes([target_node])
self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5)
logger.info('All cluster services up and '
'running after compute reboot.')
self.show_step(5)
asserts.assert_equal(
os_conn.get_instance_detail(vm).status, "ACTIVE",
"Instance did not reach active state after compute back online, "
"current state is {0}".format(
os_conn.get_instance_detail(vm).status))
logger.info('Spawned VM is ACTIVE. Trying to '
'access it via ip: {0}'.format(vm_floating_ip.ip))
try:
wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120)
except TimeoutError:
raise TimeoutError('Can not ping instance'
' by floating ip {0}'.format(vm_floating_ip.ip))
logger.info('VM is accessible. Deleting it.')
os_conn.delete_instance(vm)
os_conn.verify_srv_deleted(vm)
@test(depends_on_groups=['deploy_rh_compute_ha_one_controller_tun'],
groups=['check_rh_hard_reboot'])
@log_snapshot_after_test
def check_rh_hard_reboot(self):
"""Check that resumed VM is working properly after hard reboot of
RH-based compute
Scenario:
1. Revert environment with RH-compute.
2. Check that services are ready.
3. Boot VM on compute and check its connectivity via floating ip.
4. Hard reboot RH-based compute.
5. Verify VM connectivity via floating ip after successful reboot
and VM resume action.
Duration 20m
Snapshot check_rh_hard_reboot
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('ready_ha_one_controller_with_rh_compute',
skip_timesync=True, skip_slaves_check=True)
self.check_slaves_are_ready()
logger.debug('All slaves online.')
self.show_step(2)
cluster_id = self.fuel_web.get_last_created_cluster()
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id))
self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5)
logger.debug('Cluster up and ready.')
self.show_step(3)
cluster_id = self.fuel_web.get_last_created_cluster()
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id))
asserts.assert_equal(len(controllers), 1,
'Environment does not have 1 controller node, '
'found {} nodes!'.format(len(controllers)))
compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, ['compute'])[0]
target_node = self.fuel_web.get_devops_node_by_nailgun_node(
compute)
target_node_ip = self.fuel_web.get_node_ip_by_devops_name(
target_node.name)
net_label = self.fuel_web.get_cluster_predefined_networks_name(
cluster_id)['private_net']
vm = os_conn.create_server_for_migration(
neutron=True, label=net_label)
vm_floating_ip = os_conn.assign_floating_ip(vm)
logger.info('Trying to get vm via tcp.')
try:
wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120)
except TimeoutError:
raise TimeoutError('Can not ping instance'
' by floating ip {0}'.format(vm_floating_ip.ip))
logger.info('VM is accessible via ip: {0}'.format(vm_floating_ip.ip))
self.show_step(4)
target_node.destroy()
asserts.assert_false(target_node.driver.node_active(node=target_node),
'Target node still active')
target_node.start()
asserts.assert_true(target_node.driver.node_active(node=target_node),
'Target node did not start')
self.wait_for_slave_provision(target_node_ip)
self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5)
logger.info('All cluster services up and '
'running after compute hard reboot.')
self.show_step(5)
asserts.assert_equal(
os_conn.get_instance_detail(vm).status, "ACTIVE",
"Instance did not reach active state after compute back online, "
"current state is {0}".format(
os_conn.get_instance_detail(vm).status))
logger.info('Spawned VM is ACTIVE. Trying to '
'access it via ip: {0}'.format(vm_floating_ip.ip))
try:
wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120)
except TimeoutError:
raise TimeoutError('Can not ping instance'
' by floating ip {0}'.format(vm_floating_ip.ip))
logger.info('VM is accessible. Deleting it.')
os_conn.delete_instance(vm)
os_conn.verify_srv_deleted(vm)