Merge "Make provision to evacuate all instances"

This commit is contained in:
Jenkins 2017-08-14 05:19:24 +00:00 committed by Gerrit Code Review
commit 778cac30cd
3 changed files with 153 additions and 130 deletions

View File

@ -19,6 +19,7 @@ from eventlet import timeout as etimeout
from oslo_log import log as logging
from oslo_service import loopingcall
from oslo_utils import excutils
from oslo_utils import strutils
import taskflow.engines
from taskflow.patterns import linear_flow
@ -37,6 +38,9 @@ LOG = logging.getLogger(__name__)
ACTION = 'instance:evacuate'
# Instance power_state
SHUTDOWN = 4
class DisableComputeServiceTask(base.MasakariTask):
def __init__(self, novaclient):
@ -95,72 +99,119 @@ class EvacuateInstancesTask(base.MasakariTask):
requires=requires)
self.novaclient = novaclient
def _get_state_and_host_of_instance(self, context, instance):
new_instance = self.novaclient.get_server(context, instance.id)
instance_host = getattr(new_instance,
"OS-EXT-SRV-ATTR:hypervisor_hostname")
old_vm_state = getattr(instance, "OS-EXT-STS:vm_state")
new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state")
return (old_vm_state, new_vm_state, instance_host)
def _stop_after_evacuation(self, context, instance):
def _wait_for_stop_confirmation():
old_vm_state, new_vm_state, _ = (
self._get_state_and_host_of_instance(context, instance))
if new_vm_state == 'stopped':
raise loopingcall.LoopingCallDone()
periodic_call_stopped = loopingcall.FixedIntervalLoopingCall(
_wait_for_stop_confirmation)
try:
self.novaclient.stop_server(context, instance.id)
# confirm instance is stopped after recovery
periodic_call_stopped.start(interval=CONF.verify_interval)
etimeout.with_timeout(
CONF.wait_period_after_power_off,
periodic_call_stopped.wait)
except etimeout.Timeout:
with excutils.save_and_reraise_exception():
msg = ("Instance '%s' is successfully evacuated but "
"failed to stop.")
LOG.warning(msg, instance.id)
finally:
periodic_call_stopped.stop()
def _evacuate_and_confirm(self, context, instance, host_name,
failed_evacuation_instances, reserved_host=None):
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
if vm_state in ['active', 'error', 'resized', 'stopped']:
# Before locking the instance check whether it is already locked
# by user, if yes don't lock the instance
instance_already_locked = self.novaclient.get_server(
context, instance.id).locked
# Before locking the instance check whether it is already locked
# by user, if yes don't lock the instance
instance_already_locked = self.novaclient.get_server(
context, instance.id).locked
if not instance_already_locked:
# lock the instance so that until evacuation and confirmation
# is not complete, user won't be able to perform any actions
# on the instance.
self.novaclient.lock_server(context, instance.id)
if not instance_already_locked:
# lock the instance so that until evacuation and confirmation
# is not complete, user won't be able to perform any actions
# on the instance.
self.novaclient.lock_server(context, instance.id)
def _wait_for_evacuation_confirmation():
old_vm_state, new_vm_state, instance_host = (
self._get_state_and_host_of_instance(context, instance))
def _wait_for_evacuation():
new_instance = self.novaclient.get_server(context, instance.id)
instance_host = getattr(new_instance,
"OS-EXT-SRV-ATTR:hypervisor_hostname")
old_vm_state = getattr(instance, "OS-EXT-STS:vm_state")
new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state")
if instance_host != host_name:
if ((old_vm_state == 'error' and
new_vm_state == 'active') or
old_vm_state == new_vm_state):
raise loopingcall.LoopingCallDone()
if instance_host != host_name:
if ((old_vm_state == 'error' and
new_vm_state == 'active') or
old_vm_state == new_vm_state):
raise loopingcall.LoopingCallDone()
try:
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
# Nova evacuates an instance only when vm_state is in active,
# stopped or error state. If an instance is in other than active,
# error and stopped vm_state, masakari resets the instance state
# to *error* so that the instance can be evacuated.
stop_instance = True
if vm_state not in ['active', 'error', 'stopped']:
self.novaclient.reset_instance_state(context, instance.id)
instance = self.novaclient.get_server(context, instance.id)
power_state = getattr(instance, "OS-EXT-STS:power_state")
if vm_state == 'resized' and power_state != SHUTDOWN:
stop_instance = False
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
# evacuate the instance
self.novaclient.evacuate_instance(
context, instance.id,
target=reserved_host.name if reserved_host else None)
periodic_call = loopingcall.FixedIntervalLoopingCall(
_wait_for_evacuation_confirmation)
try:
# Nova evacuates an instance only when vm_state is in active,
# stopped or error state. If an instance is in resized
# vm_state, masakari resets the instance state to *error* so
# that the instance can be evacuated.
if vm_state == 'resized':
self.novaclient.reset_instance_state(context, instance.id)
# add a timeout to the periodic call.
periodic_call.start(interval=CONF.verify_interval)
etimeout.with_timeout(
CONF.wait_period_after_evacuation,
periodic_call.wait)
# evacuate the instance
self.novaclient.evacuate_instance(
context, instance.id,
target=reserved_host.name if reserved_host else None)
periodic_call = loopingcall.FixedIntervalLoopingCall(
_wait_for_evacuation)
try:
# add a timeout to the periodic call.
periodic_call.start(interval=CONF.verify_interval)
etimeout.with_timeout(
CONF.wait_period_after_evacuation,
periodic_call.wait)
except etimeout.Timeout:
# Instance is not evacuated in the expected time_limit.
failed_evacuation_instances.append(instance.id)
finally:
# stop the periodic call, in case of exceptions or
# Timeout.
periodic_call.stop()
except Exception:
# Exception is raised while resetting instance state or
# evacuating the instance itself.
if vm_state not in ['active', 'stopped']:
if stop_instance:
self._stop_after_evacuation(context, instance)
# If the instance was in 'error' state before failure
# it should be set to 'error' after recovery.
if vm_state == 'error':
self.novaclient.reset_instance_state(
context, instance.id)
except etimeout.Timeout:
# Instance is not evacuated in the expected time_limit.
failed_evacuation_instances.append(instance.id)
finally:
if not instance_already_locked:
# Unlock the server after evacuation and confirmation
self.novaclient.unlock_server(context, instance.id)
# stop the periodic call, in case of exceptions or
# Timeout.
periodic_call.stop()
except Exception:
# Exception is raised while resetting instance state or
# evacuating the instance itself.
failed_evacuation_instances.append(instance.id)
finally:
if not instance_already_locked:
# Unlock the server after evacuation and confirmation
self.novaclient.unlock_server(context, instance.id)
def execute(self, context, host_name, instance_list, reserved_host=None):
def _do_evacuate(context, host_name, instance_list,

View File

@ -18,6 +18,7 @@ Unit Tests for host failure TaskFlow
"""
import copy
import ddt
import mock
from masakari.compute import nova
@ -32,6 +33,7 @@ from masakari.tests.unit import fakes
CONF = conf.CONF
@ddt.ddt
@mock.patch.object(nova.API, "enable_disable_service")
@mock.patch.object(nova.API, "lock_server")
@mock.patch.object(nova.API, "unlock_server")
@ -55,8 +57,8 @@ class HostFailureTestCase(test.TestCase):
for server in self.novaclient.get_servers(self.ctxt,
self.instance_host):
instance = self.novaclient.get_server(self.ctxt, server.id)
self.assertEqual('active',
getattr(instance, 'OS-EXT-STS:vm_state'))
self.assertIn(getattr(instance, 'OS-EXT-STS:vm_state'),
['active', 'stopped', 'error'])
self.assertNotEqual(self.instance_host,
getattr(instance,
'OS-EXT-SRV-ATTR:hypervisor_hostname'))
@ -155,22 +157,47 @@ class HostFailureTestCase(test.TestCase):
self.assertIn(reserved_host.name,
self.fake_client.aggregates.get('1').hosts)
@ddt.data('active', 'rescued', 'paused', 'shelved', 'suspended',
'error', 'stopped', 'resized')
@mock.patch('masakari.compute.nova.novaclient')
def test_evacuate_instances_task(self, _mock_novaclient, mock_unlock,
mock_lock, mock_enable_disable):
def test_host_failure_flow_all_instances(
self, vm_state, _mock_novaclient, mock_unlock, mock_lock,
mock_enable_disable):
_mock_novaclient.return_value = self.fake_client
# create test data
# create ha_enabled test data
power_state = 4 if vm_state == 'resized' else None
self.fake_client.servers.create(id="1", host=self.instance_host,
vm_state="error", ha_enabled=True)
vm_state=vm_state,
power_state=power_state,
ha_enabled=True)
self.fake_client.servers.create(id="2", host=self.instance_host,
vm_state="error", ha_enabled=True)
vm_state=vm_state,
power_state=power_state,
ha_enabled=True)
instance_list = {
"instance_list": self.fake_client.servers.list()
}
# execute DisableComputeServiceTask
self._test_disable_compute_service(mock_enable_disable)
# execute EvacuateInstancesTask
self._evacuate_instances(instance_list, mock_enable_disable)
# execute PrepareHAEnabledInstancesTask
instance_list = self._test_instance_list()
@mock.patch('masakari.compute.nova.novaclient')
def test_host_failure_flow_all_instances_active_resized_instance(
self, _mock_novaclient, mock_unlock, mock_lock,
mock_enable_disable):
_mock_novaclient.return_value = self.fake_client
# create ha_enabled test data
self.fake_client.servers.create(id="1", host=self.instance_host,
vm_state='resized',
ha_enabled=True)
self.fake_client.servers.create(id="2", host=self.instance_host,
vm_state='resized',
ha_enabled=True)
instance_list = {
"instance_list": self.fake_client.servers.list()
}
# execute EvacuateInstancesTask
self._evacuate_instances(instance_list, mock_enable_disable)
@ -219,66 +246,6 @@ class HostFailureTestCase(test.TestCase):
exception.HostRecoveryFailureException,
self._evacuate_instances, instance_list, mock_enable_disable)
@mock.patch('masakari.compute.nova.novaclient')
def test_host_failure_flow_resized_instance(
self, _mock_novaclient, mock_unlock, mock_lock,
mock_enable_disable):
_mock_novaclient.return_value = self.fake_client
# create ha_enabled test data
self.fake_client.servers.create(id="1", host=self.instance_host,
vm_state="resized",
ha_enabled=True)
self.fake_client.servers.create(id="2", host=self.instance_host,
vm_state="resized",
ha_enabled=True)
instance_list = {
"instance_list": self.fake_client.servers.list()
}
# execute EvacuateInstancesTask
self._evacuate_instances(instance_list, mock_enable_disable)
@mock.patch('masakari.compute.nova.novaclient')
def test_host_failure_flow_shutdown_instance(
self, _mock_novaclient, mock_unlock, mock_lock,
mock_enable_disable):
_mock_novaclient.return_value = self.fake_client
# create ha_enabled test data
self.fake_client.servers.create(id="1", host=self.instance_host,
vm_state="stopped",
ha_enabled=True)
self.fake_client.servers.create(id="2", host=self.instance_host,
vm_state="stopped",
ha_enabled=True)
instance_list = {
"instance_list": self.fake_client.servers.list()
}
# execute EvacuateInstancesTask
self._evacuate_instances(instance_list, mock_enable_disable)
@mock.patch('masakari.compute.nova.novaclient')
def test_host_failure_flow_instance_in_error(
self, _mock_novaclient, mock_unlock, mock_lock,
mock_enable_disable):
_mock_novaclient.return_value = self.fake_client
# create ha_enabled test data
self.fake_client.servers.create(id="1", host=self.instance_host,
vm_state="error",
ha_enabled=True)
self.fake_client.servers.create(id="2", host=self.instance_host,
vm_state="error",
ha_enabled=True)
instance_list = {
"instance_list": self.fake_client.servers.list()
}
# execute EvacuateInstancesTask
self._evacuate_instances(instance_list, mock_enable_disable)
@mock.patch('masakari.compute.nova.novaclient')
def test_host_failure_flow_no_instances_on_host(
self, _mock_novaclient, mock_unlock, mock_lock,

View File

@ -24,12 +24,13 @@ NOW = timeutils.utcnow().replace(microsecond=0)
class FakeNovaClient(object):
class Server(object):
def __init__(self, id=None, uuid=None, host=None, vm_state=None,
ha_enabled=None, locked=False):
power_state=1, ha_enabled=None, locked=False):
self.id = id
self.uuid = uuid or uuidutils.generate_uuid()
self.host = host
setattr(self, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
setattr(self, 'OS-EXT-STS:vm_state', vm_state)
setattr(self, 'OS-EXT-STS:power_state', power_state)
self.metadata = {"HA_Enabled": ha_enabled}
self.locked = locked
@ -38,9 +39,10 @@ class FakeNovaClient(object):
self._servers = []
def create(self, id, uuid=None, host=None, vm_state='active',
ha_enabled=False):
power_state=1, ha_enabled=False):
server = FakeNovaClient.Server(id=id, uuid=uuid, host=host,
vm_state=vm_state,
power_state=power_state,
ha_enabled=ha_enabled)
self._servers.append(server)
return server
@ -69,9 +71,12 @@ class FakeNovaClient(object):
if not host:
host = 'fake-host-1'
server = self.get(uuid)
# pretending that instance is evacuated successfully on given host
setattr(server, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
setattr(server, 'OS-EXT-STS:vm_state', 'active')
# pretending that instance is evacuated successfully on given host
if getattr(server, "OS-EXT-STS:vm_state") == 'active':
setattr(server, 'OS-EXT-STS:vm_state', 'active')
else:
setattr(server, 'OS-EXT-STS:vm_state', 'stopped')
def stop(self, id):
server = self.get(id)