diff --git a/neutron/services/ovn_l3/plugin.py b/neutron/services/ovn_l3/plugin.py index 99c15b431c9..8220d37b8b7 100644 --- a/neutron/services/ovn_l3/plugin.py +++ b/neutron/services/ovn_l3/plugin.py @@ -414,18 +414,44 @@ class OVNL3RouterPlugin(service_base.ServicePluginBase, unhosted_gateways = self._nb_ovn.get_unhosted_gateways( port_physnet_dict, chassis_with_physnets, all_gw_chassis, chassis_with_azs) - for g_name in unhosted_gateways: + + self._reschedule_lrps(unhosted_gateways) + + def _reschedule_lrps(self, lrps): + # GW ports and its physnets. + port_physnet_dict = self._get_gateway_port_physnet_mapping() + # All chassis with physnets configured. + chassis_with_physnets = self._sb_ovn.get_chassis_and_physnets() + # All chassis with enable_as_gw_chassis set + all_gw_chassis = self._sb_ovn.get_gateway_chassis_from_cms_options() + chassis_with_azs = self._sb_ovn.get_chassis_and_azs() + + for g_name in lrps: physnet = port_physnet_dict.get(g_name[len(ovn_const.LRP_PREFIX):]) # Remove any invalid gateway chassis from the list, otherwise # we can have a situation where all existing_chassis are invalid existing_chassis = self._nb_ovn.get_gateway_chassis_binding(g_name) primary = existing_chassis[0] if existing_chassis else None az_hints = self._nb_ovn.get_gateway_chassis_az_hints(g_name) - existing_chassis = self.scheduler.filter_existing_chassis( - nb_idl=self._nb_ovn, gw_chassis=all_gw_chassis, - physnet=physnet, chassis_physnets=chassis_with_physnets, - existing_chassis=existing_chassis, az_hints=az_hints, - chassis_with_azs=chassis_with_azs) + filtered_existing_chassis = \ + self.scheduler.filter_existing_chassis( + nb_idl=self._nb_ovn, gw_chassis=all_gw_chassis, + physnet=physnet, + chassis_physnets=chassis_with_physnets, + existing_chassis=existing_chassis, az_hints=az_hints, + chassis_with_azs=chassis_with_azs) + if existing_chassis != filtered_existing_chassis: + first_diff = None + for i in range(len(filtered_existing_chassis)): + if existing_chassis[i] != filtered_existing_chassis[i]: + first_diff = i + break + if first_diff is not None: + LOG.debug( + "A chassis for this gateway has been filtered. " + "Rebalancing priorities %s and lower", first_diff) + filtered_existing_chassis = filtered_existing_chassis[ + :max(first_diff, 1)] candidates = self._ovn_client.get_candidates_for_scheduling( physnet, cms=all_gw_chassis, @@ -433,7 +459,7 @@ class OVNL3RouterPlugin(service_base.ServicePluginBase, availability_zone_hints=az_hints) chassis = self.scheduler.select( self._nb_ovn, g_name, candidates=candidates, - existing_chassis=existing_chassis) + existing_chassis=filtered_existing_chassis) if primary and primary != chassis[0]: if primary not in chassis: LOG.debug("Primary gateway chassis %(old)s " diff --git a/neutron/tests/unit/services/ovn_l3/test_plugin.py b/neutron/tests/unit/services/ovn_l3/test_plugin.py index 6827c54c110..ccf24f7d7ad 100644 --- a/neutron/tests/unit/services/ovn_l3/test_plugin.py +++ b/neutron/tests/unit/services/ovn_l3/test_plugin.py @@ -1651,6 +1651,69 @@ class TestOVNL3RouterPlugin(test_mech_driver.Ml2PluginV2TestCase): self.nb_idl().get_unhosted_gateways.assert_called_once_with( {'foo-1': 'physnet1'}, mock.ANY, mock.ANY, mock.ANY) + @mock.patch('neutron.plugins.ml2.drivers.ovn.mech_driver.mech_driver.' + 'OVNMechanismDriver.list_availability_zones', lambda *_: []) + @mock.patch('neutron.services.ovn_l3.plugin.OVNL3RouterPlugin.' + '_get_gateway_port_physnet_mapping') + def test_schedule_unhosted_gateways_rebalances_lower_prios(self, get_gppm): + unhosted_gws = ['lrp-foo-1', 'lrp-foo-2', 'lrp-foo-3'] + get_gppm.return_value = {k[len(ovn_const.LRP_PREFIX):]: 'physnet1' + for k in unhosted_gws} + # we skip chasiss2 here since we assume it has been removed + chassis_mappings = { + 'chassis1': ['physnet1'], + 'chassis3': ['physnet1'], + 'chassis4': ['physnet1'], + } + chassis = ['chassis1', 'chassis3', 'chassis4'] + self.sb_idl().get_chassis_and_physnets.return_value = ( + chassis_mappings) + self.sb_idl().get_gateway_chassis_from_cms_options.return_value = ( + chassis) + self.nb_idl().get_unhosted_gateways.return_value = unhosted_gws + self.mock_candidates.return_value = chassis + # all ports have 4 chassis (including chassis2 that will be removed) + # the ports are not perfectly balanced (but this is realistic with a) + # few router creations and deletions + existing_port_bindings = [ + ['chassis1', 'chassis2', 'chassis3', 'chassis4'], + ['chassis2', 'chassis4', 'chassis3', 'chassis1'], + ['chassis4', 'chassis3', 'chassis1', 'chassis2']] + self.nb_idl().get_gateway_chassis_binding.side_effect = ( + existing_port_bindings) + # for 1. port reschedule all besides the first + # for 2. port reschedule all besides the new first (chassis 4) + # for 3. port keep all and drop the last + self.mock_schedule.side_effect = [ + ['chassis1', 'chassis4', 'chassis3'], + ['chassis4', 'chassis3', 'chassis1'], + ['chassis4', 'chassis3', 'chassis1']] + + self.l3_inst.schedule_unhosted_gateways() + + self.mock_candidates.assert_has_calls([ + mock.call(mock.ANY, + chassis_physnets=chassis_mappings, + cms=chassis, availability_zone_hints=[])] * 3) + self.mock_schedule.assert_has_calls([ + mock.call(self.nb_idl(), 'lrp-foo-1', + ['chassis1', 'chassis3', 'chassis4'], + ['chassis1']), + mock.call(self.nb_idl(), 'lrp-foo-2', + ['chassis1', 'chassis3', 'chassis4'], + ['chassis4']), + mock.call(self.nb_idl(), 'lrp-foo-3', + ['chassis1', 'chassis3', 'chassis4'], + ['chassis4', 'chassis3', 'chassis1'])]) + # make sure that the primary chassis stays untouched + self.nb_idl().update_lrouter_port.assert_has_calls([ + mock.call('lrp-foo-1', + gateway_chassis=['chassis1', 'chassis4', 'chassis3']), + mock.call('lrp-foo-2', + gateway_chassis=['chassis4', 'chassis3', 'chassis1']), + mock.call('lrp-foo-3', + gateway_chassis=['chassis4', 'chassis3', 'chassis1'])]) + @mock.patch('neutron.plugins.ml2.plugin.Ml2Plugin.get_network') @mock.patch('neutron.plugins.ml2.plugin.Ml2Plugin.get_networks') @mock.patch('neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb.' diff --git a/releasenotes/notes/ovn_l3_reschedules_lower_lrps-5b492131dab9040b.yaml b/releasenotes/notes/ovn_l3_reschedules_lower_lrps-5b492131dab9040b.yaml new file mode 100644 index 00000000000..ba1bb6ad7d9 --- /dev/null +++ b/releasenotes/notes/ovn_l3_reschedules_lower_lrps-5b492131dab9040b.yaml @@ -0,0 +1,6 @@ +--- +other: + - | + The OVN L3 scheduler will now update lower priorities of exising LRPs in + case of a chassis change. This can create increased load on OVN during + chassis shutdown, but improves the load distribution of LRPs.