ovn: Spawn metadata resources on additional chassis event

Since the additional_chassis column [1] is used during the live
migration in order to optimize the VM network timeout, we can also use
this to minimize downtime for the metadata service. The patch adds
additional match to when new Port Binding is introduced to the chassis
associated with a node. This triggers the provisioning of metadata
resources a lot earlier in the process, while the VM is still in paused
state.

[1]
35fade3b5f

Closes-bug: #2036118
Change-Id: I66fa3c541db54c5f57404681f30b141fe8dad84a
Signed-off-by: Jakub Libosvar <libosvar@redhat.com>
This commit is contained in:
Jakub Libosvar 2023-09-21 21:06:41 +00:00
parent 1daa0dd5bf
commit 3ec7f96cbb
7 changed files with 404 additions and 6 deletions

View File

@ -67,6 +67,15 @@ def _sync_lock(f):
return wrapped
def _match_only_if_additional_chassis_is_supported(f):
@functools.wraps(f)
def wrapped(self, row, old):
if not ovn_utils.is_additional_chassis_supported(self.agent.sb_idl):
return False
return f(self, row, old)
return wrapped
class ConfigException(Exception):
"""Misconfiguration of the agent
@ -118,6 +127,8 @@ class PortBindingUpdatedEvent(PortBindingEvent):
self._is_localport_ext_ids_update,
self._is_new_chassis_set,
self._is_chassis_removed,
self._additional_chassis_added,
self._additional_chassis_removed,
]
def match_fn(self, event, row, old):
@ -154,6 +165,15 @@ class PortBindingUpdatedEvent(PortBindingEvent):
def _is_new_chassis_set(self, row, old):
self._log_msg = "Port %s in datapath %s bound to our chassis"
try:
if ovn_utils.is_additional_chassis_supported(self.agent.sb_idl):
try:
# If the additional chassis used to be in the old version
# the resources are already provisioned
if self.agent.chassis in {c.name for c in
old.additional_chassis}:
return False
except AttributeError:
pass
return (row.chassis[0].name == self.agent.chassis and
not old.chassis)
except (IndexError, AttributeError):
@ -167,6 +187,56 @@ class PortBindingUpdatedEvent(PortBindingEvent):
except (IndexError, AttributeError):
return False
@_match_only_if_additional_chassis_is_supported
def _additional_chassis_added(self, row, old):
# Additional chassis of the target node is set during an instance
# live migration. We can provision resources early before the
# instance lands on this chassis. After the VM finishes live
# migration, it already has the resources provisioned therefore we
# do not need to check when the chassis is moved from
# the Additional_Chassis column to the Chassis column.
additional_chassis = {ch for ch in row.additional_chassis
if ch.name == self.agent.chassis}
self.log_msg = (
"Live migrating port %s from network %s was added to this "
"chassis. Provisioning resources early.")
try:
# Return True if the agent chassis was added to additional_chassis
# column
return bool(
additional_chassis.difference(old.additional_chassis))
except AttributeError:
# If additional_chassis column was not changed then the old object
# raises AttributeError when reading the column
return False
@_match_only_if_additional_chassis_is_supported
def _additional_chassis_removed(self, row, old):
# The method needs to check only for a case when agent chassis was set
# in additional_chassis column, was removed but at the same time the
# agent chassis was not set to chassis column. If the agent chassis is
# set to chassis then it means live migration was successful and we do
# not need to teardown the resources.
try:
old_a_chassis = {ch for ch in old.additional_chassis
if ch.name == self.agent.chassis}
except AttributeError:
# If additional chassis was not updated, the old object has no
# additional_chassis attribute and raises an AttributeError
return False
# If was changed to the agent chassis then we do not need to teardown
# the resources
try:
if (hasattr(old, 'chassis') and
row.chassis[0].name == self.agent.chassis):
return False
except IndexError:
pass
# We match the event only if the agent chassis was in the old
# additional_chassis column and was removed
return bool(old_a_chassis.difference(row.additional_chassis))
class PortBindingDeletedEvent(PortBindingEvent):
EVENT = PortBindingEvent.ROW_DELETE
@ -392,7 +462,8 @@ class MetadataAgent(object):
"""Return a set of datapath objects of the VIF ports on the current
chassis.
"""
ports = self.sb_idl.get_ports_on_chassis(self.chassis)
ports = self.sb_idl.get_ports_on_chassis(
self.chassis, include_additional_chassis=True)
return set(p.datapath for p in self._vif_ports(ports))
@_sync_lock
@ -581,7 +652,8 @@ class MetadataAgent(object):
metadata_port_info = MetadataPortInfo(mac, ip_addresses,
metadata_port.logical_port)
chassis_ports = self.sb_idl.get_ports_on_chassis(self.chassis)
chassis_ports = self.sb_idl.get_ports_on_chassis(
self.chassis, include_additional_chassis=True)
datapath_ports_ips = []
for chassis_port in self._vif_ports(chassis_ports):
if str(chassis_port.datapath.uuid) == datapath_uuid:

View File

@ -1122,3 +1122,7 @@ def get_requested_chassis(requested_chassis):
if isinstance(requested_chassis, str):
return requested_chassis.split(',')
return []
def is_additional_chassis_supported(idl):
return idl.is_col_present('Port_Binding', 'additional_chassis')

View File

@ -971,8 +971,15 @@ class OvsdbSbOvnIdl(sb_impl_idl.OvnSbApiIdlImpl, Backend):
return self.db_set('Port_Binding', name, 'external_ids',
{'neutron-port-cidrs': cidrs})
def get_ports_on_chassis(self, chassis):
def get_ports_on_chassis(self, chassis, include_additional_chassis=False):
# TODO(twilson) Some day it would be nice to stop passing names around
# and just start using chassis objects so db_find_rows could be used
rows = self.db_list_rows('Port_Binding').execute(check_error=True)
return [r for r in rows if r.chassis and r.chassis[0].name == chassis]
if (include_additional_chassis and
utils.is_additional_chassis_supported(self)):
return [r for r in rows
if r.chassis and r.chassis[0].name == chassis or
chassis in [ch.name for ch in r.additional_chassis]]
else:
return [r for r in rows
if r.chassis and r.chassis[0].name == chassis]

View File

@ -435,8 +435,7 @@ class OVNClient(object):
# If OVN supports multi-chassis port bindings, use it for live
# migration to asynchronously configure destination port while
# VM is migrating
if self._sb_idl.is_col_present('Port_Binding',
'additional_chassis'):
if utils.is_additional_chassis_supported(self._sb_idl):
mdst = port.get(
portbindings.PROFILE, {}).get(
ovn_const.MIGRATING_ATTR)

View File

@ -22,6 +22,7 @@ from oslo_utils import uuidutils
from ovsdbapp.backend.ovs_idl import event
from ovsdbapp.backend.ovs_idl import idlutils
from ovsdbapp.tests.functional.schema.ovn_southbound import event as test_event
import testtools
from neutron.agent.linux import iptables_manager
from neutron.agent.ovn.metadata import agent
@ -33,6 +34,11 @@ from neutron.conf.agent.metadata import config as meta_config
from neutron.conf.agent.ovn.metadata import config as meta_config_ovn
from neutron.tests.common import net_helpers
from neutron.tests.functional import base
from neutron.tests.functional.common import ovn as ovn_common
class NoDatapathProvision(Exception):
pass
class MetadataAgentHealthEvent(event.WaitEvent):
@ -404,3 +410,236 @@ class TestMetadataAgent(base.TestOVNFunctionalBase):
proxy_sb_idl = self.agent._proxy.server._server._application.sb_idl
agent_sb_idl = self.agent.sb_idl
self.assertEqual(agent_sb_idl, proxy_sb_idl)
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_provisioned_on_additional_chassis_change(self):
other_chassis_name = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis", name=other_chassis_name)
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, other_chassis_name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
# Update the additional_chassis
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[agent_chassis.uuid]).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Additional chassis didn't trigger Port Binding event"))
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_not_provisioned_on_foreign_additional_chassis_change(
self):
other_chassis_name = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis", name=other_chassis_name)
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
other_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', other_chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, agent_chassis.name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
# Update the additional_chassis, the agent should not see the
# notification because it has only its own chassis row locally and
# does not see other chassis
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[other_chassis.uuid]).execute(
check_error=True, log_errors=True)
with testtools.ExpectedException(NoDatapathProvision):
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=1,
exception=NoDatapathProvision(
"Provisioning wasn't triggered"))
@ovn_common.skip_if_additional_chassis_not_supported
def test_metadata_teardown_on_additional_chassis_removed(self):
other_chassis_name = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis", name=other_chassis_name)
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, other_chassis_name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
# Update the additional_chassis
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[agent_chassis.uuid]).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Additional chassis didn't trigger Port Binding event"))
m_provision.reset_mock()
# Remove the additional_chassis but keep the chassis. This is
# simulates the live migration has failed
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[]).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Removing additional chassis did not call teardown"))
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_additional_chassis_removed_chassis_set(self):
other_chassis_name = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis", name=other_chassis_name)
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, other_chassis_name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
# Update the additional_chassis
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[agent_chassis.uuid]).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Additional chassis didn't trigger Port Binding event"))
m_provision.reset_mock()
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[], chassis=agent_chassis.uuid).execute(
check_error=True, log_errors=True)
with testtools.ExpectedException(NoDatapathProvision):
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=1,
exception=NoDatapathProvision(
"Removing additional chassis did not call teardown"))
def _test_metadata_additional_chassis_removed(self, new_chassis_uuid):
other_chassis_name = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis", name=other_chassis_name)
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, other_chassis_name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
# Update the additional_chassis
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[agent_chassis.uuid]).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Additional chassis didn't trigger Port Binding event"))
m_provision.reset_mock()
self.sb_api.db_set('Port_Binding', pb.uuid,
additional_chassis=[], chassis=new_chassis_uuid).execute(
check_error=True, log_errors=True)
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=10,
exception=NoDatapathProvision(
"Removing additional chassis did not call teardown"))
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_additional_chassis_removed_different_chassis_set(self):
other_chassis_name2 = uuidutils.generate_uuid()
self.add_fake_chassis("other_chassis2", name=other_chassis_name2)
other_chassis2 = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', other_chassis_name2)
self._test_metadata_additional_chassis_removed(other_chassis2.uuid)
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_additional_chassis_removed_chassis_unset(self):
self._test_metadata_additional_chassis_removed(new_chassis_uuid=[])
@ovn_common.skip_if_additional_chassis_not_supported('sb_api')
def test_metadata_port_binding_column_updated(self):
agent_chassis = idlutils.row_by_value(
self.sb_api, 'Chassis', 'name', self.chassis_name)
lswitchport_name, lswitch_name = self._create_logical_switch_port()
self.sb_api.lsp_bind(
lswitchport_name, agent_chassis.name).execute(
check_error=True, log_errors=True)
pb = idlutils.row_by_value(
self.sb_api, 'Port_Binding', 'logical_port', lswitchport_name)
with mock.patch.object(
agent.MetadataAgent, 'provision_datapath') as m_provision:
self.sb_api.db_add('Port_Binding', pb.uuid,
'external_ids', {'foo': 'bar'}).execute(
check_error=True, log_errors=True)
with testtools.ExpectedException(NoDatapathProvision):
n_utils.wait_until_true(
lambda: m_provision.called,
timeout=1,
exception=NoDatapathProvision(
"Provisioning wasn't triggered"))

View File

@ -0,0 +1,31 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from neutron.common.ovn import utils
def skip_if_additional_chassis_not_supported(sb_idl_attribute_name):
def outer(f):
@functools.wraps(f)
def inner(self, *args, **kwargs):
sb_idl = getattr(self, sb_idl_attribute_name)
if not utils.is_additional_chassis_supported(sb_idl):
raise self.skipException(
"Used OVN version schema does not have additional_chassis "
" column")
return f(self, *args, **kwargs)
return inner
return outer

View File

@ -13,6 +13,7 @@
#
import copy
from unittest import mock
import uuid
from neutron_lib import constants
@ -29,6 +30,7 @@ from neutron.plugins.ml2.drivers.ovn.mech_driver.ovsdb \
import impl_idl_ovn as impl
from neutron.services.portforwarding import constants as pf_const
from neutron.tests.functional import base as n_base
from neutron.tests.functional.common import ovn as ovn_common
from neutron.tests.functional.resources.ovsdb import events
OWNER = ovn_const.OVN_DEVICE_OWNER_EXT_ID_KEY
@ -201,6 +203,50 @@ class TestSbApi(BaseOvnIdlTest):
self.assertEqual([binding],
self.api.get_ports_on_chassis(chassis.name))
def _test_get_ports_on_chassis_with_additional_chassis(
self, ports, chassis, bindings, expected):
self.api.lsp_bind(
ports[0].name, chassis[0].name).execute(check_error=True)
self.api.lsp_bind(
ports[1].name, chassis[1].name).execute(check_error=True)
self.api.db_set('Port_Binding', bindings[1].uuid,
additional_chassis=[chassis[0].uuid]).execute(
check_error=True, log_errors=True)
result = self.api.get_ports_on_chassis(
chassis[0].name, include_additional_chassis=True)
self.assertEqual(expected, result)
@ovn_common.skip_if_additional_chassis_not_supported('api')
def test_get_ports_on_chassis_with_additional_chassis(self):
chassis, switch = self._add_switch(self.data['chassis'][0]['name'])
port, binding = self._add_port_to_switch(switch)
chassis2, switch2 = self._add_switch(self.data['chassis'][1]['name'])
port2, binding2 = self._add_port_to_switch(switch2)
self._test_get_ports_on_chassis_with_additional_chassis(
ports=[port, port2],
chassis=[chassis, chassis2],
bindings=[binding, binding2],
expected=[binding, binding2])
def test_get_ports_on_chassis_with_additional_chassis_not_supported(self):
chassis, switch = self._add_switch(self.data['chassis'][0]['name'])
port, binding = self._add_port_to_switch(switch)
chassis2, switch2 = self._add_switch(self.data['chassis'][1]['name'])
port2, binding2 = self._add_port_to_switch(switch2)
with mock.patch(
'neutron.common.ovn.utils.is_additional_chassis_supported',
return_value=False):
self._test_get_ports_on_chassis_with_additional_chassis(
ports=[port, port2],
chassis=[chassis, chassis2],
bindings=[binding, binding2],
expected=[binding])
class TestNbApi(BaseOvnIdlTest):