Retry plugging ports when clustered instances start

After a while, the Failover Cluster will stop retrying when attempting
to bring back up failed instances. For example, if the CSV is down
more than a few minutes, the cluster groups will be set in "Failed"
state, while the VMs won't be registered on any Hyper-V node.

The issue is that we're only handling cluster group owner changes
(moved instances). If the admin fixes the issue and manually brings
the cluster groups back up, the instances are recreated but we aren't
handling this, so ports won't get reconnected.

This change will double check the ports when clustered instances
start.

Closes-Bug: #1799163

Change-Id: I5caa65d7b7922dc9632b18acedaf1aedeec3fcc3
This commit is contained in:
Lucian Petrut 2018-10-18 17:49:22 +03:00
parent f6c6dbdf9d
commit d3360948f6
4 changed files with 43 additions and 0 deletions

View File

@ -25,6 +25,7 @@ from nova import network
from nova import objects
from nova import utils
from nova.virt import block_device
from nova.virt import event as virtevent
from os_win import exceptions as os_win_exc
from os_win import utilsfactory
from oslo_log import log as logging
@ -230,3 +231,14 @@ class ClusterOps(object):
instance.host = new_host
instance.node = new_host
instance.save(expected_task_state=[None])
def instance_state_change_callback(self, event):
if event.transition == virtevent.EVENT_LIFECYCLE_STARTED:
# In some cases, we may not be able to plug the vifs when the
# instances are failed over (e.g. if the instances end up in
# "failed" state, without actually being registered in Hyper-V,
# being brought back online afterwards)
instance = self._get_instance_by_name(event.name)
nw_info = self._network_api.get_instance_nw_info(self._context,
instance)
self._vmops.plug_vifs(instance, nw_info)

View File

@ -31,6 +31,12 @@ class HyperVClusterDriver(driver.HyperVDriver):
self._clops.start_failover_listener_daemon()
self._clops.reclaim_failovered_instances()
def _set_event_handler_callbacks(self):
super(HyperVClusterDriver, self)._set_event_handler_callbacks()
self._event_handler.add_callback(
self._clops.instance_state_change_callback)
def spawn(self, context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):

View File

@ -20,6 +20,7 @@ from nova.compute import task_states
from nova.compute import vm_states
from nova.network.neutronv2 import api as network_api
from nova import objects
from nova.virt import event as virtevent
from os_win import exceptions as os_win_exc
from compute_hyperv.nova.cluster import clusterops
@ -50,6 +51,7 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
self.clusterops = clusterops.ClusterOps()
self.clusterops._context = self.context
self._clustutils = self.clusterops._clustutils
self._network_api = self.clusterops._network_api
def test_get_instance_host(self):
mock_instance = fake_instance.fake_instance_obj(self.context)
@ -341,3 +343,17 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
self.assertEqual(mock.sentinel.host, mock_instance.host)
self.assertEqual(mock.sentinel.host, mock_instance.node)
mock_instance.save.assert_called_once_with(expected_task_state=[None])
@mock.patch.object(clusterops.ClusterOps, '_get_instance_by_name')
def test_instance_state_change_callback(self, mock_get_instance_by_name):
event = mock.Mock(transition=virtevent.EVENT_LIFECYCLE_STARTED)
mock_instance = mock_get_instance_by_name.return_value
self.clusterops.instance_state_change_callback(event)
mock_get_instance_by_name.assert_called_once_with(event.name)
self._network_api.get_instance_nw_info.assert_called_once_with(
self.context, mock_instance)
self.clusterops._vmops.plug_vifs.assert_called_once_with(
mock_instance,
self._network_api.get_instance_nw_info.return_value)

View File

@ -29,6 +29,7 @@ class HyperVClusterTestCase(test_base.HyperVBaseTestCase):
_autospec_classes = [
driver.clusterops.ClusterOps,
base_driver.eventhandler.InstanceEventHandler,
base_driver.hostops.api.API,
driver.livemigrationops.ClusterLiveMigrationOps,
]
@ -57,6 +58,14 @@ class HyperVClusterTestCase(test_base.HyperVBaseTestCase):
self.assertPublicAPISignatures(nova_base_driver.ComputeDriver,
driver.HyperVClusterDriver)
def test_set_event_handler_callbacks(self):
self.driver._set_event_handler_callbacks()
self.driver._event_handler.add_callback.assert_has_calls(
[mock.call(self.driver.emit_event),
mock.call(self.driver._vmops.instance_state_change_callback),
mock.call(self.driver._clops.instance_state_change_callback)])
@mock.patch.object(base_driver.HyperVDriver, 'spawn')
def test_spawn(self, mock_superclass_spawn):
self.driver.spawn(self.context, mock.sentinel.fake_instance,