From 9f5628e55bc70bc57bcf66e456fb18af7fe47859 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Tue, 9 Oct 2018 17:20:16 +0300 Subject: [PATCH] Improve clustered instance failover handling Instances can bounce between hosts many times in a short interval, especially if the CSVs go down (as much as 20 times in less than 2 minutes). We're not handling this properly. The failover handling logic is prone to race conditions, as multiple hosts may attempt to claim the instance, which will end up in an inconsistent state. We're introducing distributed locks, preventing races between hosts. At the same time, we're validating the events, as the instances can move again by the time we process the event. The distributed lock backend will have to be configured. At the same time, we're now waiting for "pending" cluster groups, which may not even be registered in Hyper-V, so any action we take on the VM would fail. Closes-Bug: #1795299 Closes-Bug: #1796673 Change-Id: I3dbdcf208bb7a96bd516b41e4725a5fcb37280d6 --- compute_hyperv/nova/cluster/clusterops.py | 53 ++++++++++++- compute_hyperv/nova/cluster/driver.py | 2 + compute_hyperv/nova/driver.py | 6 ++ compute_hyperv/tests/test.py | 12 +++ .../tests/unit/cluster/test_clusterops.py | 77 ++++++++++++++++--- .../tests/unit/test_coordination.py | 2 + ...er-distributed-locks-5f12252af6b3913b.yaml | 11 +++ 7 files changed, 150 insertions(+), 13 deletions(-) create mode 100644 releasenotes/notes/cluster-distributed-locks-5f12252af6b3913b.yaml diff --git a/compute_hyperv/nova/cluster/clusterops.py b/compute_hyperv/nova/cluster/clusterops.py index e7b456e0..4e5c2780 100644 --- a/compute_hyperv/nova/cluster/clusterops.py +++ b/compute_hyperv/nova/cluster/clusterops.py @@ -16,6 +16,7 @@ """Management class for Cluster VM operations.""" import functools +import time from nova.compute import power_state from nova.compute import task_states @@ -26,11 +27,13 @@ from nova import objects from nova import utils from nova.virt import block_device from nova.virt import event as virtevent +from os_win import constants as os_win_const from os_win import exceptions as os_win_exc from os_win import utilsfactory from oslo_log import log as logging import compute_hyperv.nova.conf +from compute_hyperv.nova import coordination from compute_hyperv.nova import hostops from compute_hyperv.nova import serialconsoleops from compute_hyperv.nova import vmops @@ -106,6 +109,7 @@ class ClusterOps(object): instance.name, instance.host, self._this_node) + @coordination.synchronized('failover-{instance_name}') def _failover_migrate(self, instance_name, new_host): """This method will check if the generated event is a legitimate failover to this node. If it is, it will proceed to prepare the @@ -126,6 +130,31 @@ class ClusterOps(object): 'new_host': new_host, 'old_host': old_host}) + # While the cluster group is in "pending" state, it may not even be + # registered in Hyper-V, so there's not much we can do. We'll have to + # wait for it to be handled by the Failover Cluster service. + self._wait_for_pending_instance(instance_name) + + current_host = self._clustutils.get_vm_host(instance_name) + instance_moved_again = current_host.upper() != new_host.upper() + if instance_moved_again: + LOG.warning("While processing instance %(instance)s failover to " + "%(host)s, it has moved to %(current_host)s.", + dict(host=new_host, + current_host=current_host, + instance=instance_name)) + new_host = current_host + + host_changed = old_host.upper() != new_host.upper() + migrated_here = new_host.upper() == self._this_node.upper() + migrated_from_here = old_host.upper() == self._this_node.upper() + + if not host_changed: + LOG.warning("The source node is the same as the destination " + "node: %(host)s. The instance %(instance)s may have " + "bounced between hosts due to a failure.", + dict(host=old_host, instance=instance_name)) + if instance.task_state == task_states.MIGRATING: # In case of live migration triggered by the user, we get the # event that the instance changed host but we do not want @@ -135,11 +164,11 @@ class ClusterOps(object): nw_info = self._network_api.get_instance_nw_info(self._context, instance) - if old_host and old_host.upper() == self._this_node.upper(): - LOG.debug('Actions at source node.') + if host_changed and migrated_from_here: + LOG.debug('Cleaning up moved instance: %s.', instance_name) self._vmops.unplug_vifs(instance, nw_info) return - elif new_host.upper() != self._this_node.upper(): + if not migrated_here: LOG.debug('Instance %s did not failover to this node.', instance_name) return @@ -148,10 +177,26 @@ class ClusterOps(object): {'instance': instance_name}) self._nova_failover_server(instance, new_host) - self._failover_migrate_networks(instance, old_host) + if host_changed: + self._failover_migrate_networks(instance, old_host) + self._vmops.plug_vifs(instance, nw_info) self._serial_console_ops.start_console_handler(instance_name) + def _wait_for_pending_instance(self, instance_name): + # TODO(lpetrut): switch to an event listener. We'd probably want to + # avoid having one event listener per failed over instance, as there + # can be many of them. + group_state = self._clustutils.get_cluster_group_state_info( + instance_name)['state'] + while group_state == os_win_const.CLUSTER_GROUP_PENDING: + LOG.debug("Waiting for pending instance cluster group: %s", + instance_name) + time.sleep(2) + + group_state = self._clustutils.get_cluster_group_state_info( + instance_name)['state'] + def _failover_migrate_networks(self, instance, source): """This is called after a VM failovered to this node. This will change the owner of the neutron ports to this node. diff --git a/compute_hyperv/nova/cluster/driver.py b/compute_hyperv/nova/cluster/driver.py index cbc1a984..d95a8512 100644 --- a/compute_hyperv/nova/cluster/driver.py +++ b/compute_hyperv/nova/cluster/driver.py @@ -21,6 +21,8 @@ from compute_hyperv.nova import driver class HyperVClusterDriver(driver.HyperVDriver): + use_coordination = True + def __init__(self, virtapi): super(HyperVClusterDriver, self).__init__(virtapi) diff --git a/compute_hyperv/nova/driver.py b/compute_hyperv/nova/driver.py index 0e4198f2..65faf8f3 100644 --- a/compute_hyperv/nova/driver.py +++ b/compute_hyperv/nova/driver.py @@ -30,6 +30,7 @@ from os_win import utilsfactory from oslo_log import log as logging import six +from compute_hyperv.nova import coordination from compute_hyperv.nova import eventhandler from compute_hyperv.nova import hostops from compute_hyperv.nova import imagecache @@ -108,6 +109,8 @@ class HyperVDriver(driver.ComputeDriver): "supports_trusted_certs": True, } + use_coordination = False + def __init__(self, virtapi): # check if the current version of Windows is supported before any # further driver initialisation. @@ -151,6 +154,9 @@ class HyperVDriver(driver.ComputeDriver): return False def init_host(self, host): + if self.use_coordination: + coordination.COORDINATOR.start() + self._serialconsoleops.start_console_handlers() self._set_event_handler_callbacks() diff --git a/compute_hyperv/tests/test.py b/compute_hyperv/tests/test.py index b5d43f27..38a9694f 100644 --- a/compute_hyperv/tests/test.py +++ b/compute_hyperv/tests/test.py @@ -85,6 +85,7 @@ class NoDBTestCase(base.BaseTestCase): """ TIMEOUT_SCALING_FACTOR = 1 + MOCK_TOOZ = True def setUp(self): """Run before each test method to initialize test environment.""" @@ -111,6 +112,11 @@ class NoDBTestCase(base.BaseTestCase): self.useFixture(nova_fixtures.PoisonFunctions()) + if self.MOCK_TOOZ: + self.patch('compute_hyperv.nova.coordination.Coordinator.start') + self.patch('compute_hyperv.nova.coordination.Coordinator.stop') + self.patch('compute_hyperv.nova.coordination.Coordinator.get_lock') + def _clear_attrs(self): # Delete attributes that don't start with _ so they don't pin # memory around unnecessarily for the duration of the test @@ -124,6 +130,12 @@ class NoDBTestCase(base.BaseTestCase): for k, v in six.iteritems(kw): CONF.set_override(k, v, group) + def patch(self, path, *args, **kwargs): + patcher = mock.patch(path, *args, **kwargs) + result = patcher.start() + self.addCleanup(patcher.stop) + return result + def assertPublicAPISignatures(self, baseinst, inst): def get_public_apis(inst): methods = {} diff --git a/compute_hyperv/tests/unit/cluster/test_clusterops.py b/compute_hyperv/tests/unit/cluster/test_clusterops.py index 88e2abed..9295d875 100644 --- a/compute_hyperv/tests/unit/cluster/test_clusterops.py +++ b/compute_hyperv/tests/unit/cluster/test_clusterops.py @@ -21,6 +21,7 @@ from nova.compute import vm_states from nova.network.neutronv2 import api as network_api from nova import objects from nova.virt import event as virtevent +from os_win import constants as os_win_const from os_win import exceptions as os_win_exc from compute_hyperv.nova.cluster import clusterops @@ -144,10 +145,12 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): mock_instance1.name, mock_instance1.host, self.clusterops._this_node) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') @mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') def test_failover_migrate_no_instance(self, mock_get_instance_by_name, - mock_LOG): + mock_LOG, + mock_wait_pending_instance): mock_get_instance_by_name.return_value = None self.clusterops._failover_migrate(mock.sentinel.instance_name, @@ -159,35 +162,40 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): self.assertFalse( self.clusterops._network_api.get_instance_nw_info.called) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') @mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') def test_failover_migrate_migrating(self, mock_get_instance_by_name, - mock_LOG): + mock_LOG, mock_wait_pending_instance): instance = mock_get_instance_by_name.return_value instance.task_state = task_states.MIGRATING self.clusterops._failover_migrate(mock.sentinel.instance_name, - mock.sentinel.new_host) + 'new_host') mock_LOG.debug.assert_called_once_with( 'Instance %s is live migrating.', mock.sentinel.instance_name) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') - def test_failover_migrate_at_source_node(self, mock_get_instance_by_name): + def test_failover_migrate_at_source_node(self, mock_get_instance_by_name, + mock_wait_pending_instance): instance = mock_get_instance_by_name.return_value instance.host = 'old_host' self.clusterops._this_node = instance.host self.clusterops._failover_migrate(mock.sentinel.instance_name, - mock.sentinel.new_host) + 'new_host') self.clusterops._vmops.unplug_vifs.assert_called_once_with(instance, self.clusterops._network_api.get_instance_nw_info.return_value) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') @mock.patch.object(clusterops, 'LOG') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') def test_failover_migrate_not_this_node(self, mock_get_instance_by_name, - mock_LOG): + mock_LOG, + mock_wait_pending_instance): self.clusterops._this_node = 'new_host' self.clusterops._failover_migrate(mock.sentinel.instance_name, @@ -197,21 +205,28 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): 'Instance %s did not failover to this node.', mock.sentinel.instance_name) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') @mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks') @mock.patch.object(clusterops.ClusterOps, '_nova_failover_server') @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') - def test_failover_migrate_this_node(self, mock_get_instance_by_name, - mock_nova_failover_server, - mock_failover_migrate_networks): + def test_failover_migrate_changed_host(self, mock_get_instance_by_name, + mock_nova_failover_server, + mock_failover_migrate_networks, + mock_wait_pending_instance): instance = mock_get_instance_by_name.return_value old_host = 'old_host' new_host = 'new_host' instance.host = old_host self.clusterops._this_node = new_host + self._clustutils.get_vm_host.return_value = new_host self.clusterops._failover_migrate(mock.sentinel.instance_name, new_host) + mock_wait_pending_instance.assert_called_once_with( + mock.sentinel.instance_name) + self._clustutils.get_vm_host.assert_called_once_with( + mock.sentinel.instance_name) mock_get_instance_by_name.assert_called_once_with( mock.sentinel.instance_name) get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info @@ -225,6 +240,50 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): c_handler = self.clusterops._serial_console_ops.start_console_handler c_handler.assert_called_once_with(mock.sentinel.instance_name) + @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance') + @mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks') + @mock.patch.object(clusterops.ClusterOps, '_nova_failover_server') + @mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name') + def test_failover_same_node(self, mock_get_instance_by_name, + mock_nova_failover_server, + mock_failover_migrate_networks, + mock_wait_pending_instance): + # In some cases, the instances may bounce between hosts. We're testing + # the case in which the instance is actually returning to the initial + # host during the time in which we're processing events. + instance = mock_get_instance_by_name.return_value + old_host = 'old_host' + new_host = 'new_host' + instance.host = old_host + self.clusterops._this_node = old_host + self._clustutils.get_vm_host.return_value = old_host + + self.clusterops._failover_migrate(mock.sentinel.instance_name, + new_host) + + get_inst_nw_info = self.clusterops._network_api.get_instance_nw_info + get_inst_nw_info.assert_called_once_with(self.clusterops._context, + instance) + mock_nova_failover_server.assert_called_once_with(instance, old_host) + self.clusterops._vmops.unplug_vifs.assert_not_called() + self.clusterops._vmops.plug_vifs.assert_called_once_with( + instance, get_inst_nw_info.return_value) + mock_failover_migrate_networks.assert_not_called() + c_handler = self.clusterops._serial_console_ops.start_console_handler + c_handler.assert_called_once_with(mock.sentinel.instance_name) + + @mock.patch('time.sleep') + def test_wait_for_pending_instance(self, mock_sleep): + self._clustutils.get_cluster_group_state_info.side_effect = [ + dict(state=os_win_const.CLUSTER_GROUP_PENDING), + dict(state=os_win_const.CLUSTER_GROUP_ONLINE)] + + self.clusterops._wait_for_pending_instance(mock.sentinel.instance_name) + + self._clustutils.get_cluster_group_state_info.assert_has_calls( + [mock.call(mock.sentinel.instance_name)] * 2) + mock_sleep.assert_called_once_with(2) + def test_failover_migrate_networks(self): mock_instance = fake_instance.fake_instance_obj(self.context) fake_source = mock.MagicMock() diff --git a/compute_hyperv/tests/unit/test_coordination.py b/compute_hyperv/tests/unit/test_coordination.py index 6dffbd6a..e818ac43 100644 --- a/compute_hyperv/tests/unit/test_coordination.py +++ b/compute_hyperv/tests/unit/test_coordination.py @@ -50,6 +50,8 @@ class MockToozLock(tooz.locking.Lock): @mock.patch('tooz.coordination.get_coordinator') class CoordinatorTestCase(test_base.HyperVBaseTestCase): + MOCK_TOOZ = False + def test_coordinator_start(self, get_coordinator): crd = get_coordinator.return_value diff --git a/releasenotes/notes/cluster-distributed-locks-5f12252af6b3913b.yaml b/releasenotes/notes/cluster-distributed-locks-5f12252af6b3913b.yaml new file mode 100644 index 00000000..758fc8bc --- /dev/null +++ b/releasenotes/notes/cluster-distributed-locks-5f12252af6b3913b.yaml @@ -0,0 +1,11 @@ +--- +upgrade: + - | + When using the cluster driver, a distributed lock backend will have to be + configured. +fixes: + - | + In order to fix race conditions that can occur when handling instance + failovers, the cluster driver is now using distributed locks. A + distributed lock backend (e.g. etcd, mysql, file based, etc) will have to + be configured.