Merge "Locate RP-tree parent by hypervisor name"

This commit is contained in:
Zuul 2019-12-19 17:01:21 +00:00 committed by Gerrit Code Review
commit 49f905150d
12 changed files with 252 additions and 52 deletions

View File

@ -83,8 +83,7 @@ class PlacementState(object):
rp_inventory_defaults,
driver_uuid_namespace,
agent_type,
agent_host,
agent_host_rp_uuid,
hypervisor_rps,
device_mappings,
supported_vnic_types,
client):
@ -92,8 +91,7 @@ class PlacementState(object):
self._rp_inventory_defaults = rp_inventory_defaults
self._driver_uuid_namespace = driver_uuid_namespace
self._agent_type = agent_type
self._agent_host = agent_host
self._agent_host_rp_uuid = agent_host_rp_uuid
self._hypervisor_rps = hypervisor_rps
self._device_mappings = device_mappings
self._supported_vnic_types = supported_vnic_types
self._client = client
@ -124,44 +122,49 @@ class PlacementState(object):
traits += self._deferred_update_vnic_type_traits()
return traits
def _deferred_create_agent_rp(self):
agent_rp_name = '%s:%s' % (self._agent_host, self._agent_type)
agent_rp_uuid = place_utils.agent_resource_provider_uuid(
self._driver_uuid_namespace, self._agent_host)
agent_rp = DeferredCall(
self._client.ensure_resource_provider,
resource_provider={
'name': agent_rp_name,
'uuid': agent_rp_uuid,
'parent_provider_uuid': self._agent_host_rp_uuid})
return agent_rp
def _deferred_create_agent_rps(self):
# While an instance of this class represents a single agent,
# that agent is allowed to handle devices of multiple hypervisors.
# Since each hypervisor has its own root resource provider
# we must create an agent RP under each hypervisor RP.
rps = []
for hypervisor in self._hypervisor_rps.values():
agent_rp_name = '%s:%s' % (hypervisor['name'], self._agent_type)
agent_rp_uuid = place_utils.agent_resource_provider_uuid(
self._driver_uuid_namespace, hypervisor['name'])
rps.append(
DeferredCall(
self._client.ensure_resource_provider,
resource_provider={
'name': agent_rp_name,
'uuid': agent_rp_uuid,
'parent_provider_uuid': hypervisor['uuid']}))
return rps
def _deferred_create_device_rps(self, agent_rp):
def _deferred_create_device_rps(self):
rps = []
for device in self._rp_bandwidths:
rp_name = '%s:%s' % (agent_rp['resource_provider']['name'], device)
hypervisor = self._hypervisor_rps[device]
rp_name = '%s:%s:%s' % (
hypervisor['name'], self._agent_type, device)
rp_uuid = place_utils.device_resource_provider_uuid(
self._driver_uuid_namespace,
self._agent_host,
hypervisor['name'],
device)
rps.append(
DeferredCall(
self._client.ensure_resource_provider,
{'name': rp_name,
'uuid': rp_uuid,
'parent_provider_uuid': agent_rp[
'resource_provider']['uuid']}))
'parent_provider_uuid': hypervisor['uuid']}))
return rps
def deferred_create_resource_providers(self):
agent_rp = self._deferred_create_agent_rp()
# XXX(bence romsics): I don't like digging in the deferred agent
# object, but without proper Promises I don't see a significantly
# nicer solution.
device_rps = self._deferred_create_device_rps(agent_rp=agent_rp.kwargs)
agent_rps = self._deferred_create_agent_rps()
device_rps = self._deferred_create_device_rps()
rps = []
rps.append(agent_rp)
rps.extend(agent_rps)
rps.extend(device_rps)
return rps
@ -179,7 +182,7 @@ class PlacementState(object):
for device in self._rp_bandwidths:
rp_uuid = place_utils.device_resource_provider_uuid(
self._driver_uuid_namespace,
self._agent_host,
self._hypervisor_rps[device]['name'],
device)
traits = []
traits.append(physnet_trait_mappings[device])
@ -198,7 +201,7 @@ class PlacementState(object):
for device, bw_values in self._rp_bandwidths.items():
rp_uuid = place_utils.device_resource_provider_uuid(
self._driver_uuid_namespace,
self._agent_host,
self._hypervisor_rps[device]['name'],
device)
inventories = {}

View File

@ -14,6 +14,7 @@
# under the License.
import os
import socket
from neutron_lib.utils import runtime
from oslo_config import cfg
@ -67,3 +68,27 @@ def load_interface_driver(conf, get_networks_callback=None):
def is_agent_down(heart_beat_time):
return timeutils.is_older_than(heart_beat_time,
cfg.CONF.agent_down_time)
# TODO(bence romsics): rehome this to neutron_lib.placement.utils
def default_rp_hypervisors(hypervisors, device_mappings):
"""Fill config option 'resource_provider_hypervisors' with defaults.
Default hypervisor names to socket.gethostname(). Since libvirt knows
itself by the same name, the default is good for libvirt.
:param hypervisors: Config option 'resource_provider_hypervisors'
as parsed by oslo.config, that is a dict with keys of physical devices
and values of hypervisor names.
:param device_mappings: Device mappings standardized to the list-valued
format.
"""
default_hypervisor = socket.gethostname()
rv = {}
for _physnet, devices in device_mappings.items():
for device in devices:
if device in hypervisors:
rv[device] = hypervisors[device]
else:
rv[device] = default_hypervisor
return rv

View File

@ -58,6 +58,15 @@ sriov_nic_opts = [
"inventories against. An omitted direction means we do "
"not report an inventory for the corresponding "
"class.")),
cfg.DictOpt('resource_provider_hypervisors',
default={},
help=_("Mapping of network devices to hypervisors: "
"<network_device>:<hypervisor>,... "
"hypervisor name is used to locate the parent of the "
"resource provider tree. Only needs to be set in the "
"rare case when the hypervisor name is different from "
"the DEFAULT.host config option value as known by the "
"nova-compute managing that hypervisor.")),
cfg.DictOpt('resource_provider_inventory_defaults',
default={'allocation_ratio': 1.0,
'min_unit': 1,

View File

@ -76,6 +76,15 @@ ovs_opts = [
"placement nor report inventories against. An omitted "
"direction means we do not report an inventory for the "
"corresponding class.")),
cfg.DictOpt('resource_provider_hypervisors',
default={},
help=_("Mapping of bridges to hypervisors: "
"<bridge>:<hypervisor>,... "
"hypervisor name is used to locate the parent of the "
"resource provider tree. Only needs to be set in the "
"rare case when the hypervisor name is different from "
"the DEFAULT.host config option value as known by the "
"nova-compute managing that hypervisor.")),
cfg.DictOpt('resource_provider_inventory_defaults',
default={'allocation_ratio': 1.0,
'min_unit': 1,

View File

@ -34,6 +34,7 @@ from osprofiler import profiler
import six
from neutron._i18n import _
from neutron.agent.common import utils
from neutron.agent.l2 import l2_agent_extensions_manager as ext_manager
from neutron.agent import rpc as agent_rpc
from neutron.agent import securitygroups_rpc as agent_sg_rpc
@ -133,7 +134,8 @@ class SriovNicSwitchRpcCallbacks(sg_rpc.SecurityGroupAgentRpcCallbackMixin):
@profiler.trace_cls("rpc")
class SriovNicSwitchAgent(object):
def __init__(self, physical_devices_mappings, exclude_devices,
polling_interval, rp_bandwidths, rp_inventory_defaults):
polling_interval, rp_bandwidths, rp_inventory_defaults,
rp_hypervisors):
self.polling_interval = polling_interval
self.network_ports = collections.defaultdict(list)
@ -162,6 +164,7 @@ class SriovNicSwitchAgent(object):
n_constants.RP_BANDWIDTHS: rp_bandwidths,
n_constants.RP_INVENTORY_DEFAULTS:
rp_inventory_defaults,
'resource_provider_hypervisors': rp_hypervisors,
'extensions': self.ext_manager.names()}
# TODO(mangelajo): optimize resource_versions (see ovs agent)
@ -514,6 +517,10 @@ class SriovNicAgentConfigParser(object):
cfg.CONF.SRIOV_NIC.resource_provider_bandwidths)
self.rp_inventory_defaults = place_utils.parse_rp_inventory_defaults(
cfg.CONF.SRIOV_NIC.resource_provider_inventory_defaults)
self.rp_hypervisors = utils.default_rp_hypervisors(
cfg.CONF.SRIOV_NIC.resource_provider_hypervisors,
self.device_mappings
)
self._validate()
def _validate(self):
@ -546,6 +553,7 @@ def main():
exclude_devices = config_parser.exclude_devices
rp_bandwidths = config_parser.rp_bandwidths
rp_inventory_defaults = config_parser.rp_inventory_defaults
rp_hypervisors = config_parser.rp_hypervisors
except ValueError:
LOG.exception("Failed on Agent configuration parse. "
@ -560,7 +568,8 @@ def main():
exclude_devices,
polling_interval,
rp_bandwidths,
rp_inventory_defaults)
rp_inventory_defaults,
rp_hypervisors)
except exc.SriovNicError:
LOG.exception("Agent Initialization Failed")
raise SystemExit(1)

View File

@ -222,6 +222,10 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
br_set)
self.rp_inventory_defaults = place_utils.parse_rp_inventory_defaults(
ovs_conf.resource_provider_inventory_defaults)
self.rp_hypervisors = utils.default_rp_hypervisors(
ovs_conf.resource_provider_hypervisors,
{k: [v] for k, v in self.bridge_mappings.items()}
)
self.setup_physical_bridges(self.bridge_mappings)
self.vlan_manager = vlanmanager.LocalVlanManager()
@ -314,6 +318,8 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin,
n_const.RP_BANDWIDTHS: self.rp_bandwidths,
n_const.RP_INVENTORY_DEFAULTS:
self.rp_inventory_defaults,
'resource_provider_hypervisors':
self.rp_hypervisors,
'integration_bridge':
ovs_conf.integration_bridge,
'tunnel_types': self.tunnel_types,

View File

@ -76,6 +76,25 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
uuid_ns = mech_driver.resource_provider_uuid5_namespace
supported_vnic_types = mech_driver.supported_vnic_types
device_mappings = mech_driver.get_standard_device_mappings(agent)
if 'resource_provider_hypervisors' in configurations:
# When the agent has the fix for
# https://bugs.launchpad.net/neutron/+bug/1853840
# it sends us hypervisor names (compute nodes in nova terminology).
hypervisors = configurations['resource_provider_hypervisors']
else:
# For older agents without the fix we have to assume the old
# buggy behavior. There we assumed DEFAULT.host is the same as the
# hypervisor name, which is true in many deployments, but not
# always. (In nova terminology: The compute host's DEFAULT.host is
# not neccessarily the same as the compute node name. We may even
# have multiple compute nodes behind a compute host.)
# TODO(bence romsics): This else branch can be removed when we no
# longer want to support pre-Ussuri agents.
hypervisors = {
device: agent['host']
for device
in configurations['resource_provider_bandwidths'].keys()
}
log_msg = (
'Synchronization of resources '
@ -84,8 +103,16 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
'to placement %(result)s.')
try:
agent_host_rp_uuid = self._get_rp_by_name(
name=agent['host'])['uuid']
name2uuid = {}
for name in hypervisors.values():
name2uuid[name] = self._get_rp_by_name(name=name)['uuid']
hypervisor_rps = {}
for device, hypervisor in hypervisors.items():
hypervisor_rps[device] = {
'name': hypervisor,
'uuid': name2uuid[hypervisor],
}
except (IndexError, ks_exc.HttpError, ks_exc.ClientException):
agent_db.resources_synced = False
agent_db.update()
@ -105,8 +132,7 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
'resource_provider_inventory_defaults'],
driver_uuid_namespace=uuid_ns,
agent_type=agent['agent_type'],
agent_host=agent['host'],
agent_host_rp_uuid=agent_host_rp_uuid,
hypervisor_rps=hypervisor_rps,
device_mappings=device_mappings,
supported_vnic_types=supported_vnic_types,
client=self._placement_client)

View File

@ -48,15 +48,23 @@ class PlacementStateTestCase(base.BaseTestCase):
self.client_mock = mock.Mock()
self.driver_uuid_namespace = uuid.UUID(
'00000000-0000-0000-0000-000000000001')
self.agent_host_rp_uuid = uuid.UUID(
'00000000-0000-0000-0000-000000000002')
# uuid below generated by the following command:
# uuid -v5 '00000000-0000-0000-0000-000000000001' 'fakehost'
self.hypervisor1_rp_uuid = uuid.UUID(
'c0b4abe5-516f-54b8-b965-ff94060dcbcc')
# uuid below generated by the following command:
# uuid -v5 '00000000-0000-0000-0000-000000000001' 'fakehost2'
self.hypervisor2_rp_uuid = uuid.UUID(
'544155b7-1295-5f10-b5f0-eadc50abc6d4')
self.kwargs = {
'rp_bandwidths': {},
'rp_inventory_defaults': {},
'driver_uuid_namespace': self.driver_uuid_namespace,
'agent_type': 'fake agent type',
'agent_host': 'fakehost',
'agent_host_rp_uuid': self.agent_host_rp_uuid,
'hypervisor_rps': {
'eth0': {'name': 'fakehost', 'uuid': self.hypervisor1_rp_uuid},
'eth1': {'name': 'fakehost', 'uuid': self.hypervisor1_rp_uuid},
},
'device_mappings': {},
'supported_vnic_types': [],
'client': self.client_mock,
@ -92,11 +100,11 @@ class PlacementStateTestCase(base.BaseTestCase):
self.client_mock.update_trait.assert_any_call(
name='CUSTOM_VNIC_TYPE_DIRECT')
def test__deferred_create_agent_rp(self):
def test__deferred_create_agent_rps(self):
state = placement_report.PlacementState(**self.kwargs)
deferred = state._deferred_create_agent_rp()
deferred.execute()
for deferred in state._deferred_create_agent_rps():
deferred.execute()
self.client_mock.ensure_resource_provider.assert_called_with(
resource_provider={
@ -104,7 +112,37 @@ class PlacementStateTestCase(base.BaseTestCase):
# uuid below generated by the following command:
# uuid -v5 '00000000-0000-0000-0000-000000000001' 'fakehost'
'uuid': uuid.UUID('c0b4abe5-516f-54b8-b965-ff94060dcbcc'),
'parent_provider_uuid': self.agent_host_rp_uuid})
'parent_provider_uuid': self.hypervisor1_rp_uuid})
def test__deferred_create_agent_rps_multiple_hypervisors(self):
self.kwargs['hypervisor_rps']['eth1'] = {
'name': 'fakehost2',
'uuid': self.hypervisor2_rp_uuid,
}
state = placement_report.PlacementState(**self.kwargs)
for deferred in state._deferred_create_agent_rps():
deferred.execute()
self.client_mock.ensure_resource_provider.assert_has_calls(
any_order=True,
calls=[
mock.call(resource_provider={
'name': 'fakehost:fake agent type',
# uuid below generated by the following command:
# uuid -v5 '00000000-0000-0000-0000-000000000001' \
# 'fakehost'
'uuid': uuid.UUID('c0b4abe5-516f-54b8-b965-ff94060dcbcc'),
'parent_provider_uuid': self.hypervisor1_rp_uuid}),
mock.call(resource_provider={
'name': 'fakehost2:fake agent type',
# uuid below generated by the following command:
# uuid -v5 '00000000-0000-0000-0000-000000000001' \
# 'fakehost2'
'uuid': uuid.UUID('544155b7-1295-5f10-b5f0-eadc50abc6d4'),
'parent_provider_uuid': self.hypervisor2_rp_uuid}),
]
)
def test_deferred_create_resource_providers(self):
self.kwargs.update({

View File

@ -13,6 +13,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import socket
import mock
from neutron.agent.common import utils
@ -75,3 +77,26 @@ class TestLoadInterfaceDriver(base.BaseTestCase):
self.conf.set_override('interface_driver', 'openvswitchXX')
with testlib_api.ExpectedException(SystemExit):
utils.load_interface_driver(self.conf)
# TODO(bence romsics): rehome this to neutron_lib
class TestDefaultRpHypervisors(base.BaseTestCase):
def test_defaults(self):
this_host = socket.gethostname()
self.assertEqual(
{'eth0': this_host, 'eth1': this_host},
utils.default_rp_hypervisors(
hypervisors={},
device_mappings={'physnet0': ['eth0', 'eth1']},
)
)
self.assertEqual(
{'eth0': 'thathost', 'eth1': this_host},
utils.default_rp_hypervisors(
hypervisors={'eth0': 'thathost'},
device_mappings={'physnet0': ['eth0', 'eth1']},
)
)

View File

@ -54,7 +54,7 @@ class TestSriovAgent(base.BaseTestCase):
'FixedIntervalLoopingCall',
new=MockFixedIntervalLoopingCall)
self.agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {})
self.agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {}, {})
@mock.patch("neutron.plugins.ml2.drivers.mech_sriov.agent.eswitch_manager"
".ESwitchManager.get_assigned_devices_info", return_value=set())
@ -82,7 +82,7 @@ class TestSriovAgent(base.BaseTestCase):
"eswitch_manager.PciOsWrapper.is_assigned_vf",
return_value=True)
def test_treat_devices_removed_with_existed_device(self, *args):
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {})
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {}, {})
devices = [(DEVICE_MAC, PCI_SLOT)]
with mock.patch.object(agent.plugin_rpc,
"update_device_down") as fn_udd:
@ -99,7 +99,7 @@ class TestSriovAgent(base.BaseTestCase):
"eswitch_manager.PciOsWrapper.is_assigned_vf",
return_value=True)
def test_treat_devices_removed_with_not_existed_device(self, *args):
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {})
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {}, {})
devices = [(DEVICE_MAC, PCI_SLOT)]
with mock.patch.object(agent.plugin_rpc,
"update_device_down") as fn_udd:
@ -119,7 +119,7 @@ class TestSriovAgent(base.BaseTestCase):
"eswitch_manager.PciOsWrapper.is_assigned_vf",
return_value=True)
def test_treat_devices_removed_failed(self, *args):
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {})
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {}, {})
devices = [(DEVICE_MAC, PCI_SLOT)]
with mock.patch.object(agent.plugin_rpc,
"update_device_down") as fn_udd:
@ -546,7 +546,7 @@ class TestSriovAgent(base.BaseTestCase):
def test_configurations_has_rp_bandwidth(self):
rp_bandwidth = {'ens7': {'egress': 10000, 'ingress': 10000}}
agent = sriov_nic_agent.SriovNicSwitchAgent(
{}, {}, 0, rp_bandwidth, {})
{}, {}, 0, rp_bandwidth, {}, {})
self.assertIn(constants.RP_BANDWIDTHS,
agent.agent_state['configurations'])
@ -562,7 +562,7 @@ class TestSriovAgent(base.BaseTestCase):
'reserved': 0
}
agent = sriov_nic_agent.SriovNicSwitchAgent(
{}, {}, 0, {}, rp_inventory_values)
{}, {}, 0, {}, rp_inventory_values, {})
self.assertIn(constants.RP_INVENTORY_DEFAULTS,
agent.agent_state['configurations'])
@ -706,7 +706,7 @@ class TestSRIOVAgentExtensionConfig(base.BaseTestCase):
def test_report_loaded_extension(self, *args):
with mock.patch.object(agent_rpc.PluginReportStateAPI,
'report_state') as mock_report_state:
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {})
agent = sriov_nic_agent.SriovNicSwitchAgent({}, {}, 0, {}, {}, {})
agent._report_state()
mock_report_state.assert_called_with(
agent.context, agent.agent_state)

View File

@ -53,7 +53,9 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
# looking all good
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {'resource_provider_bandwidths': {}},
'configurations': {
'resource_provider_bandwidths': {'some iface': ''},
},
'host': 'fake host',
}
agent_db = mock.Mock()
@ -76,7 +78,8 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
# looking all good
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {'resource_provider_bandwidths': {}},
'configurations': {
'resource_provider_bandwidths': {'some iface': ''}},
'host': 'fake host',
}
agent_db = mock.Mock()
@ -201,7 +204,7 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
self.assertEqual(1, mock_get_agent.call_count)
self.assertEqual(1, mock_sync.call_count)
def test__sync_placement_state(self):
def test__sync_placement_state_legacy(self):
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {
@ -222,6 +225,30 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
self.assertEqual(1, mock_queue_event.call_count)
def test__sync_placement_state_rp_hypervisors(self):
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {
'resource_provider_bandwidths': {},
'resource_provider_inventory_defaults': {},
'resource_provider_hypervisors': {'eth0': 'hypervisor0'},
},
'host': 'fake host',
}
agent_db = mock.Mock()
with mock.patch.object(self.service_plugin._batch_notifier,
'queue_event') as mock_queue_event, \
mock.patch.object(self.service_plugin._placement_client,
'list_resource_providers',
return_value={'resource_providers': [
{'uuid': 'fake uuid'}]}) as mock_list_rps:
self.service_plugin._sync_placement_state(agent, agent_db)
self.assertEqual(1, mock_queue_event.call_count)
mock_list_rps.assert_called_once_with(name='hypervisor0')
class PlacementReporterAgentsTestCases(test_plugin.Ml2PluginV2TestCase):

View File

@ -0,0 +1,23 @@
---
fixes:
- |
Neutron now locates the root resource provider of the resource provider
tree it creates by using the hypervisor name instead of the hostname.
These are different in rare cases only. The hypervisor name can be set
per physical network device in config option
``resource_provider_hypervisors`` which is located in the ``[ovs]``
ini-section for ``ovs-agent`` and ``[sriov_nic]`` ini-section for
``sriov-agent``. Hypervisor names default to ``socket.gethostname()``
which works out of the box with ``libvirt`` even when the ``DEFAULT.host``
config option is set to a non-default value. We believe this change fixes
`bug 696600 <https://launchpad.net/bugs/1853840>`_.
upgrade:
- |
For users affected by `bug 696600
<https://launchpad.net/bugs/1853840>`_ the hypervisor name
now can be set per physical network device in config option
``resource_provider_hypervisors`` which is located in the ``[ovs]``
ini-section for ``ovs-agent`` and ``[sriov_nic]`` ini-section for
``sriov-agent``. Hypervisor names default to ``socket.gethostname()``
which works out of the box with ``libvirt`` even when the
``DEFAULT.host`` config option is set to a non-default value.