Recreate ports for failed over instances

OVS ports can become invalid after the instances are failed over. If an instance lands back on the initial host and the ports are not cleaned up, it will be left with those stale ports. This change adds a config option, allowing those ports to be recreated. It will be enabled by default. Deployers may consider disabling it if the remaining ports are cleaned up when rebooting nodes or if the network backend is not affected by this issue. Closes-Bug: #1841778 Change-Id: I58aba53b7bc0a5e8954cf31adac6e7e106944b7e
2019-08-28 13:07:46 +03:00 · 2019-08-28 13:07:46 +03:00 · 7dd16950d3
parent a721681e61
commit 7dd16950d3
3 changed files with 21 additions and 2 deletions
--- a/compute_hyperv/nova/cluster/clusterops.py
+++ b/compute_hyperv/nova/cluster/clusterops.py
@ -189,7 +189,10 @@ class ClusterOps(object):
                LOG.exception("Could not update failed over instance '%s' "
                              "allocations.", instance)

+        if CONF.hyperv.recreate_ports_on_failover:
+            self._vmops.unplug_vifs(instance, nw_info)
        self._vmops.plug_vifs(instance, nw_info)
+
        self._serial_console_ops.start_console_handler(instance_name)

    def _wait_for_pending_instance(self, instance_name):
--- a/compute_hyperv/nova/conf.py
+++ b/compute_hyperv/nova/conf.py
@ -50,6 +50,11 @@ hyperv_opts = [
               min=1,
               help="The number of hours in which the max_failover_count "
                    "number of failovers can occur."),
+    cfg.BoolOpt('recreate_ports_on_failover',
+                default=True,
+                help="When enabled, the ports will be recreated for failed "
+                     "over instances. This ensures that we're not left with "
+                     "a stale port."),
    cfg.BoolOpt('auto_failback',
                default=True,
                help="Allow the VM the failback to its original host once it "
--- a/compute_hyperv/tests/unit/cluster/test_clusterops.py
+++ b/compute_hyperv/tests/unit/cluster/test_clusterops.py
@ -250,6 +250,9 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
        c_handler = self.clusterops._serial_console_ops.start_console_handler
        c_handler.assert_called_once_with(mock.sentinel.instance_name)

+    @ddt.data({},
+              {'recreate_ports_on_failover': True})
+    @ddt.unpack
    @mock.patch.object(clusterops.ClusterOps, '_wait_for_pending_instance')
    @mock.patch.object(clusterops.ClusterOps, '_failover_migrate_networks')
    @mock.patch.object(clusterops.ClusterOps, '_nova_failover_server')
@ -257,10 +260,14 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
    def test_failover_same_node(self, mock_get_instance_by_name,
                                mock_nova_failover_server,
                                mock_failover_migrate_networks,
-                                mock_wait_pending_instance):
+                                mock_wait_pending_instance,
+                                recreate_ports_on_failover=False):
        # In some cases, the instances may bounce between hosts. We're testing
        # the case in which the instance is actually returning to the initial
        # host during the time in which we're processing events.
+        self.flags(recreate_ports_on_failover=recreate_ports_on_failover,
+                   group='hyperv')
+
        instance = mock_get_instance_by_name.return_value
        old_host = 'old_host'
        new_host = 'new_host'
@ -275,7 +282,11 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase):
        get_inst_nw_info.assert_called_once_with(self.clusterops._context,
                                                 instance)
        mock_nova_failover_server.assert_called_once_with(instance, old_host)
-        self.clusterops._vmops.unplug_vifs.assert_not_called()
+        if recreate_ports_on_failover:
+            self.clusterops._vmops.unplug_vifs.assert_called_once_with(
+                instance, get_inst_nw_info.return_value)
+        else:
+            self.clusterops._vmops.unplug_vifs.assert_not_called()
        self.clusterops._vmops.plug_vifs.assert_called_once_with(
            instance, get_inst_nw_info.return_value)
        self._placement.move_compute_node_allocations.assert_not_called()