Merge "Make provision to evacuate all instances"
This commit is contained in:
commit
778cac30cd
|
@ -19,6 +19,7 @@ from eventlet import timeout as etimeout
|
|||
|
||||
from oslo_log import log as logging
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import strutils
|
||||
import taskflow.engines
|
||||
from taskflow.patterns import linear_flow
|
||||
|
@ -37,6 +38,9 @@ LOG = logging.getLogger(__name__)
|
|||
|
||||
ACTION = 'instance:evacuate'
|
||||
|
||||
# Instance power_state
|
||||
SHUTDOWN = 4
|
||||
|
||||
|
||||
class DisableComputeServiceTask(base.MasakariTask):
|
||||
def __init__(self, novaclient):
|
||||
|
@ -95,72 +99,119 @@ class EvacuateInstancesTask(base.MasakariTask):
|
|||
requires=requires)
|
||||
self.novaclient = novaclient
|
||||
|
||||
def _get_state_and_host_of_instance(self, context, instance):
|
||||
new_instance = self.novaclient.get_server(context, instance.id)
|
||||
instance_host = getattr(new_instance,
|
||||
"OS-EXT-SRV-ATTR:hypervisor_hostname")
|
||||
old_vm_state = getattr(instance, "OS-EXT-STS:vm_state")
|
||||
new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state")
|
||||
|
||||
return (old_vm_state, new_vm_state, instance_host)
|
||||
|
||||
def _stop_after_evacuation(self, context, instance):
|
||||
def _wait_for_stop_confirmation():
|
||||
old_vm_state, new_vm_state, _ = (
|
||||
self._get_state_and_host_of_instance(context, instance))
|
||||
|
||||
if new_vm_state == 'stopped':
|
||||
raise loopingcall.LoopingCallDone()
|
||||
|
||||
periodic_call_stopped = loopingcall.FixedIntervalLoopingCall(
|
||||
_wait_for_stop_confirmation)
|
||||
|
||||
try:
|
||||
self.novaclient.stop_server(context, instance.id)
|
||||
# confirm instance is stopped after recovery
|
||||
periodic_call_stopped.start(interval=CONF.verify_interval)
|
||||
etimeout.with_timeout(
|
||||
CONF.wait_period_after_power_off,
|
||||
periodic_call_stopped.wait)
|
||||
except etimeout.Timeout:
|
||||
with excutils.save_and_reraise_exception():
|
||||
msg = ("Instance '%s' is successfully evacuated but "
|
||||
"failed to stop.")
|
||||
LOG.warning(msg, instance.id)
|
||||
finally:
|
||||
periodic_call_stopped.stop()
|
||||
|
||||
def _evacuate_and_confirm(self, context, instance, host_name,
|
||||
failed_evacuation_instances, reserved_host=None):
|
||||
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
|
||||
if vm_state in ['active', 'error', 'resized', 'stopped']:
|
||||
# Before locking the instance check whether it is already locked
|
||||
# by user, if yes don't lock the instance
|
||||
instance_already_locked = self.novaclient.get_server(
|
||||
context, instance.id).locked
|
||||
|
||||
# Before locking the instance check whether it is already locked
|
||||
# by user, if yes don't lock the instance
|
||||
instance_already_locked = self.novaclient.get_server(
|
||||
context, instance.id).locked
|
||||
if not instance_already_locked:
|
||||
# lock the instance so that until evacuation and confirmation
|
||||
# is not complete, user won't be able to perform any actions
|
||||
# on the instance.
|
||||
self.novaclient.lock_server(context, instance.id)
|
||||
|
||||
if not instance_already_locked:
|
||||
# lock the instance so that until evacuation and confirmation
|
||||
# is not complete, user won't be able to perform any actions
|
||||
# on the instance.
|
||||
self.novaclient.lock_server(context, instance.id)
|
||||
def _wait_for_evacuation_confirmation():
|
||||
old_vm_state, new_vm_state, instance_host = (
|
||||
self._get_state_and_host_of_instance(context, instance))
|
||||
|
||||
def _wait_for_evacuation():
|
||||
new_instance = self.novaclient.get_server(context, instance.id)
|
||||
instance_host = getattr(new_instance,
|
||||
"OS-EXT-SRV-ATTR:hypervisor_hostname")
|
||||
old_vm_state = getattr(instance, "OS-EXT-STS:vm_state")
|
||||
new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state")
|
||||
if instance_host != host_name:
|
||||
if ((old_vm_state == 'error' and
|
||||
new_vm_state == 'active') or
|
||||
old_vm_state == new_vm_state):
|
||||
raise loopingcall.LoopingCallDone()
|
||||
|
||||
if instance_host != host_name:
|
||||
if ((old_vm_state == 'error' and
|
||||
new_vm_state == 'active') or
|
||||
old_vm_state == new_vm_state):
|
||||
raise loopingcall.LoopingCallDone()
|
||||
try:
|
||||
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
|
||||
|
||||
# Nova evacuates an instance only when vm_state is in active,
|
||||
# stopped or error state. If an instance is in other than active,
|
||||
# error and stopped vm_state, masakari resets the instance state
|
||||
# to *error* so that the instance can be evacuated.
|
||||
stop_instance = True
|
||||
if vm_state not in ['active', 'error', 'stopped']:
|
||||
self.novaclient.reset_instance_state(context, instance.id)
|
||||
instance = self.novaclient.get_server(context, instance.id)
|
||||
power_state = getattr(instance, "OS-EXT-STS:power_state")
|
||||
if vm_state == 'resized' and power_state != SHUTDOWN:
|
||||
stop_instance = False
|
||||
|
||||
vm_state = getattr(instance, "OS-EXT-STS:vm_state")
|
||||
|
||||
# evacuate the instance
|
||||
self.novaclient.evacuate_instance(
|
||||
context, instance.id,
|
||||
target=reserved_host.name if reserved_host else None)
|
||||
|
||||
periodic_call = loopingcall.FixedIntervalLoopingCall(
|
||||
_wait_for_evacuation_confirmation)
|
||||
|
||||
try:
|
||||
# Nova evacuates an instance only when vm_state is in active,
|
||||
# stopped or error state. If an instance is in resized
|
||||
# vm_state, masakari resets the instance state to *error* so
|
||||
# that the instance can be evacuated.
|
||||
if vm_state == 'resized':
|
||||
self.novaclient.reset_instance_state(context, instance.id)
|
||||
# add a timeout to the periodic call.
|
||||
periodic_call.start(interval=CONF.verify_interval)
|
||||
etimeout.with_timeout(
|
||||
CONF.wait_period_after_evacuation,
|
||||
periodic_call.wait)
|
||||
|
||||
# evacuate the instance
|
||||
self.novaclient.evacuate_instance(
|
||||
context, instance.id,
|
||||
target=reserved_host.name if reserved_host else None)
|
||||
|
||||
periodic_call = loopingcall.FixedIntervalLoopingCall(
|
||||
_wait_for_evacuation)
|
||||
|
||||
try:
|
||||
# add a timeout to the periodic call.
|
||||
periodic_call.start(interval=CONF.verify_interval)
|
||||
etimeout.with_timeout(
|
||||
CONF.wait_period_after_evacuation,
|
||||
periodic_call.wait)
|
||||
except etimeout.Timeout:
|
||||
# Instance is not evacuated in the expected time_limit.
|
||||
failed_evacuation_instances.append(instance.id)
|
||||
finally:
|
||||
# stop the periodic call, in case of exceptions or
|
||||
# Timeout.
|
||||
periodic_call.stop()
|
||||
except Exception:
|
||||
# Exception is raised while resetting instance state or
|
||||
# evacuating the instance itself.
|
||||
if vm_state not in ['active', 'stopped']:
|
||||
if stop_instance:
|
||||
self._stop_after_evacuation(context, instance)
|
||||
# If the instance was in 'error' state before failure
|
||||
# it should be set to 'error' after recovery.
|
||||
if vm_state == 'error':
|
||||
self.novaclient.reset_instance_state(
|
||||
context, instance.id)
|
||||
except etimeout.Timeout:
|
||||
# Instance is not evacuated in the expected time_limit.
|
||||
failed_evacuation_instances.append(instance.id)
|
||||
finally:
|
||||
if not instance_already_locked:
|
||||
# Unlock the server after evacuation and confirmation
|
||||
self.novaclient.unlock_server(context, instance.id)
|
||||
# stop the periodic call, in case of exceptions or
|
||||
# Timeout.
|
||||
periodic_call.stop()
|
||||
except Exception:
|
||||
# Exception is raised while resetting instance state or
|
||||
# evacuating the instance itself.
|
||||
failed_evacuation_instances.append(instance.id)
|
||||
finally:
|
||||
if not instance_already_locked:
|
||||
# Unlock the server after evacuation and confirmation
|
||||
self.novaclient.unlock_server(context, instance.id)
|
||||
|
||||
def execute(self, context, host_name, instance_list, reserved_host=None):
|
||||
def _do_evacuate(context, host_name, instance_list,
|
||||
|
|
|
@ -18,6 +18,7 @@ Unit Tests for host failure TaskFlow
|
|||
"""
|
||||
import copy
|
||||
|
||||
import ddt
|
||||
import mock
|
||||
|
||||
from masakari.compute import nova
|
||||
|
@ -32,6 +33,7 @@ from masakari.tests.unit import fakes
|
|||
CONF = conf.CONF
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@mock.patch.object(nova.API, "enable_disable_service")
|
||||
@mock.patch.object(nova.API, "lock_server")
|
||||
@mock.patch.object(nova.API, "unlock_server")
|
||||
|
@ -55,8 +57,8 @@ class HostFailureTestCase(test.TestCase):
|
|||
for server in self.novaclient.get_servers(self.ctxt,
|
||||
self.instance_host):
|
||||
instance = self.novaclient.get_server(self.ctxt, server.id)
|
||||
self.assertEqual('active',
|
||||
getattr(instance, 'OS-EXT-STS:vm_state'))
|
||||
self.assertIn(getattr(instance, 'OS-EXT-STS:vm_state'),
|
||||
['active', 'stopped', 'error'])
|
||||
self.assertNotEqual(self.instance_host,
|
||||
getattr(instance,
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'))
|
||||
|
@ -155,22 +157,47 @@ class HostFailureTestCase(test.TestCase):
|
|||
self.assertIn(reserved_host.name,
|
||||
self.fake_client.aggregates.get('1').hosts)
|
||||
|
||||
@ddt.data('active', 'rescued', 'paused', 'shelved', 'suspended',
|
||||
'error', 'stopped', 'resized')
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_evacuate_instances_task(self, _mock_novaclient, mock_unlock,
|
||||
mock_lock, mock_enable_disable):
|
||||
def test_host_failure_flow_all_instances(
|
||||
self, vm_state, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create test data
|
||||
# create ha_enabled test data
|
||||
power_state = 4 if vm_state == 'resized' else None
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state="error", ha_enabled=True)
|
||||
vm_state=vm_state,
|
||||
power_state=power_state,
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state="error", ha_enabled=True)
|
||||
vm_state=vm_state,
|
||||
power_state=power_state,
|
||||
ha_enabled=True)
|
||||
instance_list = {
|
||||
"instance_list": self.fake_client.servers.list()
|
||||
}
|
||||
|
||||
# execute DisableComputeServiceTask
|
||||
self._test_disable_compute_service(mock_enable_disable)
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
# execute PrepareHAEnabledInstancesTask
|
||||
instance_list = self._test_instance_list()
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_all_instances_active_resized_instance(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state='resized',
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state='resized',
|
||||
ha_enabled=True)
|
||||
instance_list = {
|
||||
"instance_list": self.fake_client.servers.list()
|
||||
}
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
@ -219,66 +246,6 @@ class HostFailureTestCase(test.TestCase):
|
|||
exception.HostRecoveryFailureException,
|
||||
self._evacuate_instances, instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_resized_instance(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state="resized",
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state="resized",
|
||||
ha_enabled=True)
|
||||
instance_list = {
|
||||
"instance_list": self.fake_client.servers.list()
|
||||
}
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_shutdown_instance(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state="stopped",
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state="stopped",
|
||||
ha_enabled=True)
|
||||
instance_list = {
|
||||
"instance_list": self.fake_client.servers.list()
|
||||
}
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_instance_in_error(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
mock_enable_disable):
|
||||
_mock_novaclient.return_value = self.fake_client
|
||||
|
||||
# create ha_enabled test data
|
||||
self.fake_client.servers.create(id="1", host=self.instance_host,
|
||||
vm_state="error",
|
||||
ha_enabled=True)
|
||||
self.fake_client.servers.create(id="2", host=self.instance_host,
|
||||
vm_state="error",
|
||||
ha_enabled=True)
|
||||
instance_list = {
|
||||
"instance_list": self.fake_client.servers.list()
|
||||
}
|
||||
|
||||
# execute EvacuateInstancesTask
|
||||
self._evacuate_instances(instance_list, mock_enable_disable)
|
||||
|
||||
@mock.patch('masakari.compute.nova.novaclient')
|
||||
def test_host_failure_flow_no_instances_on_host(
|
||||
self, _mock_novaclient, mock_unlock, mock_lock,
|
||||
|
|
|
@ -24,12 +24,13 @@ NOW = timeutils.utcnow().replace(microsecond=0)
|
|||
class FakeNovaClient(object):
|
||||
class Server(object):
|
||||
def __init__(self, id=None, uuid=None, host=None, vm_state=None,
|
||||
ha_enabled=None, locked=False):
|
||||
power_state=1, ha_enabled=None, locked=False):
|
||||
self.id = id
|
||||
self.uuid = uuid or uuidutils.generate_uuid()
|
||||
self.host = host
|
||||
setattr(self, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
|
||||
setattr(self, 'OS-EXT-STS:vm_state', vm_state)
|
||||
setattr(self, 'OS-EXT-STS:power_state', power_state)
|
||||
self.metadata = {"HA_Enabled": ha_enabled}
|
||||
self.locked = locked
|
||||
|
||||
|
@ -38,9 +39,10 @@ class FakeNovaClient(object):
|
|||
self._servers = []
|
||||
|
||||
def create(self, id, uuid=None, host=None, vm_state='active',
|
||||
ha_enabled=False):
|
||||
power_state=1, ha_enabled=False):
|
||||
server = FakeNovaClient.Server(id=id, uuid=uuid, host=host,
|
||||
vm_state=vm_state,
|
||||
power_state=power_state,
|
||||
ha_enabled=ha_enabled)
|
||||
self._servers.append(server)
|
||||
return server
|
||||
|
@ -69,9 +71,12 @@ class FakeNovaClient(object):
|
|||
if not host:
|
||||
host = 'fake-host-1'
|
||||
server = self.get(uuid)
|
||||
# pretending that instance is evacuated successfully on given host
|
||||
setattr(server, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
|
||||
setattr(server, 'OS-EXT-STS:vm_state', 'active')
|
||||
# pretending that instance is evacuated successfully on given host
|
||||
if getattr(server, "OS-EXT-STS:vm_state") == 'active':
|
||||
setattr(server, 'OS-EXT-STS:vm_state', 'active')
|
||||
else:
|
||||
setattr(server, 'OS-EXT-STS:vm_state', 'stopped')
|
||||
|
||||
def stop(self, id):
|
||||
server = self.get(id)
|
||||
|
|
Loading…
Reference in New Issue