Adds Hyper-V Cluster scenario

Hyper-V VMs can be clustered, making them highly available.

We can force a VM failover through WinRM, causing the VM to
restart on another host. For this, the Hyper-V hosts must have
WinRM enabled.

A VM must have network connectivity after the failover,
and operations (resize, migrate, etc.) must still succeed after failover.

Adds the following config options:
- cluster_enabled (default = False)
- username
- password
- failover_timeout (default = 120 seconds)
- failover_sleep_interval (default = 5 seconds)

Adds HyperVClusterTest.
This commit is contained in:
Claudiu Belu 2017-06-26 06:59:57 -07:00
parent baa1ef318b
commit 763348b367
7 changed files with 245 additions and 0 deletions

View File

View File

@ -0,0 +1,60 @@
# Copyright 2013 Cloudbase Solutions Srl
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from winrm import protocol
from oswin_tempest_plugin import exceptions
LOG = logging.getLogger(__name__)
protocol.Protocol.DEFAULT_TIMEOUT = "PT3600S"
def run_wsman_cmd(host, username, password, cmd, fail_on_error=False):
url = 'https://%s:5986/wsman' % host
LOG.debug('Connecting to: %s', host)
p = protocol.Protocol(endpoint=url,
transport='plaintext',
server_cert_validation='ignore',
username=username,
password=password)
shell_id = p.open_shell()
LOG.debug('Running command on host %(host)s: %(cmd)s',
{'host': host, 'cmd': cmd})
command_id = p.run_command(shell_id, cmd)
std_out, std_err, return_code = p.get_command_output(shell_id, command_id)
p.cleanup_command(shell_id, command_id)
p.close_shell(shell_id)
LOG.debug('Results from %(host)s: return_code: %(return_code)s, std_out: '
'%(std_out)s, std_err: %(std_err)s',
{'host': host, 'return_code': return_code, 'std_out': std_out,
'std_err': std_err})
if fail_on_error and return_code:
raise exceptions.WSManException(
cmd=cmd, host=host, return_code=return_code,
std_out=std_out, std_err=std_err)
return (std_out, std_err, return_code)
def run_wsman_ps(host, username, password, cmd, fail_on_error=False):
cmd = ("powershell -NonInteractive -ExecutionPolicy RemoteSigned "
"-Command \"%s\"" % cmd)
return run_wsman_cmd(host, username, password, cmd, fail_on_error)

View File

@ -36,6 +36,21 @@ HyperVGroup = [
cfg.StrOpt('gen2_image_ref',
help="Valid Generation 2 VM VHDX image reference to be used "
"in tests."),
cfg.BoolOpt('cluster_enabled',
default=False,
help="The compute nodes are joined into a Hyper-V Cluster."),
cfg.StrOpt('username',
help="The username of the Hyper-V hosts."),
cfg.StrOpt('password',
secret=True,
help='The password of the Hyper-V hosts.'),
cfg.IntOpt('failover_timeout',
default=120,
help='The maximum amount of time to wait for a failover to '
'occur.'),
cfg.IntOpt('failover_sleep_interval',
default=5,
help='The amount of time to wait between failover checks.'),
]

View File

@ -19,3 +19,13 @@ from tempest.lib import exceptions
class ResizeException(exceptions.TempestException):
message = ("Server %(server_id)s failed to resize to the given "
"flavor %(flavor)s")
class NotFoundException(exceptions.TempestException):
message = "Resource %(resource)s (%(res_type)s) was not found."
class WSManException(exceptions.TempestException):
message = ('Command "%(cmd)s" failed on host %(host)s failed with the '
'return code %(return_code)s. std_out: %(std_out)s, '
'std_err: %(std_err)s')

View File

@ -0,0 +1,158 @@
# Copyright 2017 Cloudbase Solutions SRL
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import time
from oslo_log import log as logging
from tempest.lib import exceptions as lib_exc
from oswin_tempest_plugin.clients import wsman
from oswin_tempest_plugin import config
from oswin_tempest_plugin import exceptions
from oswin_tempest_plugin.tests import test_base
from oswin_tempest_plugin.tests._mixins import migrate
from oswin_tempest_plugin.tests._mixins import resize
CONF = config.CONF
LOG = logging.getLogger(__name__)
class HyperVClusterTest(test_base.TestBase,
migrate._MigrateMixin,
resize._ResizeMixin):
"""The test suite for the Hyper-V Cluster.
This test suite will test the functionality of the Hyper-V Cluster Driver
in OpenStack. The tests will force a failover on its newly created
instance, and asserts the following:
* the instance moves to another host.
* the nova instance's host is properly updated.
* the instance's network connection still works.
* different nova operations can be performed properly.
This test suite relies on the fact that there are at least 2 compute nodes
available, that they are clustered, and have WSMan configured.
The test suite contains the following tests:
* test_check_clustered_vm
* test_check_migration
* test_check_resize
* test_check_resize_negative
"""
_BIGGER_FLAVOR = {'disk': 1}
_BAD_FLAVOR = {'disk': -1}
@classmethod
def skip_checks(cls):
super(HyperVClusterTest, cls).skip_checks()
# check if the cluster Tests can be run.
conf_opts = ['cluster_enabled', 'username', 'password']
for conf_opt in conf_opts:
if not getattr(CONF.hyperv, conf_opt):
msg = ('The config option "hyperv.%s" has not been set. '
'Skipping.' % conf_opt)
raise cls.skipException(msg)
if not CONF.compute.min_compute_nodes >= 2:
msg = 'Expected at least 2 compute nodes.'
raise cls.skipException(msg)
def _failover_server(self, server_name, host_ip):
"""Triggers the failover for the given server on the given host."""
resource_name = "Virtual Machine %s" % server_name
cmd = "Test-ClusterResourceFailure -Name '%s'" % resource_name
# NOTE(claudiub): we issue the failover command twice, because on
# the first failure, the Hyper-V Cluster will prefer the current
# node, and will try to reactivate the VM on the it, and it will
# succeed. On the 2nd failure, the VM will failover to another
# node. Also, there needs to be a delay between commands, so the
# original failover has time to finish.
wsman.run_wsman_ps(host_ip, CONF.hyperv.username,
CONF.hyperv.password, cmd, True)
time.sleep(CONF.hyperv.failover_sleep_interval)
wsman.run_wsman_ps(host_ip, CONF.hyperv.username,
CONF.hyperv.password, cmd, True)
def _wait_for_failover(self, server, original_host):
"""Waits for the given server to failover to another host.
:raises TimeoutException: if the given server did not failover to
another host within the configured "CONF.hyperv.failover_timeout"
interval.
"""
LOG.debug('Waiting for server %(server)s to failover from '
'compute node %(host)s',
dict(server=server['id'], host=original_host))
start_time = int(time.time())
timeout = CONF.hyperv.failover_timeout
while True:
elapsed_time = int(time.time()) - start_time
admin_server = self._get_server_as_admin(server)
current_host = admin_server['OS-EXT-SRV-ATTR:host']
if current_host != original_host:
LOG.debug('Server %(server)s failovered from compute node '
'%(host)s in %(seconds)s seconds.',
dict(server=server['id'], host=original_host,
seconds=elapsed_time))
return
if elapsed_time >= timeout:
msg = ('Server %(server)s did not failover in the given '
'amount of time (%(timeout)s s).')
raise lib_exc.TimeoutException(
msg % dict(server=server['id'], timeout=timeout))
time.sleep(CONF.hyperv.failover_sleep_interval)
def _get_hypervisor(self, hostname):
hypervisors = self.admin_hypervisor_client.list_hypervisors(
detail=True)['hypervisors']
hypervisor = [h for h in hypervisors if
h['hypervisor_hostname'] == hostname]
if not hypervisor:
raise exceptions.NotFoundException(resource=hostname,
res_type='hypervisor')
return hypervisor[0]
def _get_server_as_admin(self, server):
# only admins have access to certain instance properties.
return self.admin_servers_client.show_server(
server['id'])['server']
def _create_server(self):
server_tuple = super(HyperVClusterTest, self)._create_server()
server = server_tuple.server
admin_server = self._get_server_as_admin(server)
server_name = admin_server['OS-EXT-SRV-ATTR:instance_name']
hostname = admin_server['OS-EXT-SRV-ATTR:host']
host_ip = self._get_hypervisor(hostname)['host_ip']
self._failover_server(server_name, host_ip)
self._wait_for_failover(server, hostname)
return server_tuple
def test_clustered_vm(self):
server_tuple = self._create_server()
self._check_server_connectivity(server_tuple)

View File

@ -76,6 +76,7 @@ class TestBase(tempest.test.BaseTestCase):
cls.admin_servers_client = cls.os_admin.servers_client
cls.admin_flavors_client = cls.os_admin.flavors_client
cls.admin_migrations_client = cls.os_admin.migrations_client
cls.admin_hypervisor_client = cls.os_admin.hypervisor_client
# Neutron network client
cls.security_groups_client = (

View File

@ -3,3 +3,4 @@
# process, which may cause wedges in the gate later.
pbr>=2.0 # Apache-2.0
pywinrm>=0.2.2 # MIT