Recreate ports for failed over instances

OVS ports can become invalid after the instances are failed over.
If an instance lands back on the initial host and the ports are
not cleaned up, it will be left with those stale ports.

This change adds a config option, allowing those ports to be recreated.
It will be enabled by default. Deployers may consider disabling it
if the remaining ports are cleaned up when rebooting nodes or if the
network backend is not affected by this issue.

Closes-Bug: #1841778

Change-Id: I58aba53b7bc0a5e8954cf31adac6e7e106944b7e
This commit is contained in:
Lucian Petrut 2019-08-28 13:07:46 +03:00
parent a721681e61
commit 7dd16950d3
3 changed files with 21 additions and 2 deletions

View File

@ -189,7 +189,10 @@ class ClusterOps(object):
LOG.exception("Could not update failed over instance '%s' "
"allocations.", instance)
if CONF.hyperv.recreate_ports_on_failover:
self._vmops.unplug_vifs(instance, nw_info)
self._vmops.plug_vifs(instance, nw_info)
self._serial_console_ops.start_console_handler(instance_name)
def _wait_for_pending_instance(self, instance_name):

View File

@ -50,6 +50,11 @@ hyperv_opts = [
min=1,
help="The number of hours in which the max_failover_count "
"number of failovers can occur."),
cfg.BoolOpt('recreate_ports_on_failover',
default=True,
help="When enabled, the ports will be recreated for failed "
"over instances. This ensures that we're not left with "
"a stale port."),
cfg.BoolOpt('auto_failback',
default=True,
help="Allow the VM the failback to its original host once it "

View File

@ -250,6 +250,9 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
c_handler = self.clusterops._serial_console_ops.start_console_handler
c_handler.assert_called_once_with(mock.sentinel.instance_name)
@ddt.data({},
{'recreate_ports_on_failover': True})
@ddt.unpack
@mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
@mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
@mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@ -257,10 +260,14 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
def test_failover_same_node(self, mock_get_instance_by_name,
mock_nova_failover_server,
mock_failover_migrate_networks,
mock_wait_pending_instance):
mock_wait_pending_instance,
recreate_ports_on_failover=False):
# In some cases, the instances may bounce between hosts. We're testing
# the case in which the instance is actually returning to the initial
# host during the time in which we're processing events.
self.flags(recreate_ports_on_failover=recreate_ports_on_failover,
group='hyperv')
instance = mock_get_instance_by_name.return_value
old_host = 'old_host'
new_host = 'new_host'
@ -275,7 +282,11 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
get_inst_nw_info.assert_called_once_with(self.clusterops._context,
instance)
mock_nova_failover_server.assert_called_once_with(instance, old_host)
self.clusterops._vmops.unplug_vifs.assert_not_called()
if recreate_ports_on_failover:
self.clusterops._vmops.unplug_vifs.assert_called_once_with(
instance, get_inst_nw_info.return_value)
else:
self.clusterops._vmops.unplug_vifs.assert_not_called()
self.clusterops._vmops.plug_vifs.assert_called_once_with(
instance, get_inst_nw_info.return_value)
self._placement.move_compute_node_allocations.assert_not_called()