Merge "Suppress IPv6 metadata DAD failure and delete address"
This commit is contained in:
commit
3d54423ecf
|
@ -441,6 +441,38 @@ To test the HA of DHCP agent:
|
|||
|
||||
#. Start DHCP agent on HostB. The VM gets the wanted IP again.
|
||||
|
||||
No HA for metadata service on isolated networks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
All Neutron backends using the DHCP agent can also provide `metadata service
|
||||
<https://docs.openstack.org/nova/latest/user/metadata.html>`_ in isolated
|
||||
networks (i.e. networks without a router). In this case the DHCP agent manages
|
||||
the metadata service (see config option `enable_isolated_metadata
|
||||
<https://docs.openstack.org/neutron/latest/configuration/dhcp-agent.html#DEFAULT.enable_isolated_metadata>`_).
|
||||
|
||||
Note however that the metadata service is only redundant for IPv4, and not
|
||||
IPv6, even when the DHCP service is configured to be highly available
|
||||
(config option `dhcp_agents_per_network
|
||||
<https://docs.openstack.org/neutron/latest/configuration/neutron.html#DEFAULT.dhcp_agents_per_network>`_
|
||||
> 1). This is because the DHCP agent will insert a route to the well known
|
||||
metadata IPv4 address (`169.254.169.254`) via its own IP address, so it will
|
||||
be reachable as long as the DHCP service is available at that IP address.
|
||||
This also means that recovery after a failure is tied to the renewal of the
|
||||
DHCP lease, since that route will only change if the DHCP server for a VM
|
||||
changes.
|
||||
|
||||
With IPv6, the well known metadata IPv6 address (`fe80::a9fe:a9fe`) is used,
|
||||
but directly configured in the DHCP agent network namespace.
|
||||
Due to the enforcement of duplicate address detection (DAD), this address
|
||||
can only be configured in at most one DHCP network namespaces at any time.
|
||||
See `RFC 4862 <https://www.rfc-editor.org/rfc/rfc4862#section-5.4>`_ for
|
||||
details on the DAD process.
|
||||
|
||||
For this reason, even when you have multiple DHCP agents, an arbitrary one
|
||||
(where the metadata IPv6 address is not in `dadfailed` state) will serve all
|
||||
metadata requests over IPv6. When that metadata service instance becomes
|
||||
unreachable there is no failover and the service will become unreachable.
|
||||
|
||||
Disabling and removing an agent
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ from neutron.agent.linux import external_process
|
|||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import iptables_manager
|
||||
from neutron.cmd import runtime_checks as checks
|
||||
from neutron.common import _constants as common_constants
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.ipam import utils as ipam_utils
|
||||
from neutron.privileged.agent.linux import dhcp as priv_dhcp
|
||||
|
@ -1841,7 +1842,7 @@ class DeviceManager(object):
|
|||
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
|
||||
ip_cidrs.append(constants.METADATA_CIDR)
|
||||
if netutils.is_ipv6_enabled():
|
||||
ip_cidrs.append(constants.METADATA_V6_CIDR)
|
||||
ip_cidrs.append(common_constants.METADATA_V6_CIDR)
|
||||
|
||||
self.driver.init_l3(interface_name, ip_cidrs,
|
||||
namespace=network.namespace)
|
||||
|
|
|
@ -102,6 +102,10 @@ class AddressNotReady(exceptions.NeutronException):
|
|||
"become ready: %(reason)s")
|
||||
|
||||
|
||||
class DADFailed(AddressNotReady):
|
||||
pass
|
||||
|
||||
|
||||
InvalidArgument = privileged.InvalidArgument
|
||||
|
||||
|
||||
|
@ -592,7 +596,7 @@ class IpAddrCommand(IpDeviceCommandBase):
|
|||
"""Wait until an address is no longer marked 'tentative' or 'dadfailed'
|
||||
|
||||
raises AddressNotReady if times out, address not present on interface
|
||||
or DAD fails
|
||||
raises DADFailed if Duplicate Address Detection fails
|
||||
"""
|
||||
def is_address_ready():
|
||||
try:
|
||||
|
@ -604,7 +608,7 @@ class IpAddrCommand(IpDeviceCommandBase):
|
|||
# Since both 'dadfailed' and 'tentative' will be set if DAD fails,
|
||||
# check 'dadfailed' first just to be explicit
|
||||
if addr_info['dadfailed']:
|
||||
raise AddressNotReady(
|
||||
raise DADFailed(
|
||||
address=address, reason=_('Duplicate address detected'))
|
||||
if addr_info['tentative']:
|
||||
return False
|
||||
|
|
|
@ -33,6 +33,7 @@ from neutron.agent.l3 import namespaces
|
|||
from neutron.agent.linux import external_process
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import utils as linux_utils
|
||||
from neutron.common import _constants as common_constants
|
||||
from neutron.common import coordination
|
||||
from neutron.common import metadata as comm_meta
|
||||
from neutron.common import utils as common_utils
|
||||
|
@ -241,9 +242,30 @@ class MetadataDriver(object):
|
|||
# HAProxy cannot bind() until IPv6 Duplicate Address Detection
|
||||
# completes. We must wait until the address leaves its 'tentative'
|
||||
# state.
|
||||
ip_lib.IpAddrCommand(
|
||||
parent=ip_lib.IPDevice(name=bind_interface, namespace=ns_name)
|
||||
).wait_until_address_ready(address=bind_address_v6)
|
||||
try:
|
||||
ip_lib.IpAddrCommand(
|
||||
parent=ip_lib.IPDevice(name=bind_interface,
|
||||
namespace=ns_name)
|
||||
).wait_until_address_ready(address=bind_address_v6)
|
||||
except ip_lib.DADFailed as exc:
|
||||
# This failure means that another DHCP agent has already
|
||||
# configured this metadata address, so all requests will
|
||||
# be via that single agent.
|
||||
LOG.info('DAD failed for address %(address)s on interface '
|
||||
'%(interface)s in namespace %(namespace)s on network '
|
||||
'%(network)s, deleting it. Exception: %(exception)s',
|
||||
{'address': bind_address_v6,
|
||||
'interface': bind_interface,
|
||||
'namespace': ns_name,
|
||||
'network': network_id,
|
||||
'exception': str(exc)})
|
||||
try:
|
||||
ip_lib.delete_ip_address(bind_address_v6, bind_interface,
|
||||
namespace=ns_name)
|
||||
except Exception as exc:
|
||||
# do not re-raise a delete failure, just log
|
||||
LOG.info('Address deletion failure: %s', str(exc))
|
||||
return
|
||||
pm.enable()
|
||||
monitor.register(uuid, METADATA_SERVICE_NAME, pm)
|
||||
cls.monitors[router_id] = pm
|
||||
|
@ -338,6 +360,6 @@ def apply_metadata_nat_rules(router, proxy):
|
|||
if netutils.is_ipv6_enabled():
|
||||
for c, r in proxy.metadata_nat_rules(
|
||||
proxy.metadata_port,
|
||||
metadata_address=(constants.METADATA_V6_IP + '/128')):
|
||||
metadata_address=(common_constants.METADATA_V6_CIDR)):
|
||||
router.iptables_manager.ipv6['nat'].add_rule(c, r)
|
||||
router.iptables_manager.apply()
|
||||
|
|
|
@ -86,3 +86,6 @@ TRAIT_NETWORK_TUNNEL = 'CUSTOM_NETWORK_TUNNEL_PROVIDER'
|
|||
|
||||
# The lowest binding index for L3 agents and DHCP agents.
|
||||
LOWEST_AGENT_BINDING_INDEX = 1
|
||||
|
||||
# Neutron-lib defines this with a /64 but it should be /128
|
||||
METADATA_V6_CIDR = constants.METADATA_V6_IP + '/128'
|
||||
|
|
|
@ -33,7 +33,9 @@ AGENTS_SCHEDULER_OPTS = [
|
|||
'network. If this number is greater than 1, the '
|
||||
'scheduler automatically assigns multiple DHCP agents '
|
||||
'for a given tenant network, providing high '
|
||||
'availability for the DHCP service.')),
|
||||
'availability for the DHCP service. However this does '
|
||||
'not provide high availability for the IPv6 metadata '
|
||||
'service in isolated networks.')),
|
||||
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
|
||||
default=False,
|
||||
help=_('Enable services on an agent with admin_state_up '
|
||||
|
|
|
@ -37,6 +37,7 @@ from neutron.agent.linux import dhcp
|
|||
from neutron.agent.linux import interface
|
||||
from neutron.agent.linux import utils as linux_utils
|
||||
from neutron.agent.metadata import driver as metadata_driver
|
||||
from neutron.common import _constants as common_constants
|
||||
from neutron.common import config as common_config
|
||||
from neutron.common import utils
|
||||
from neutron.conf.agent import common as config
|
||||
|
@ -1929,7 +1930,7 @@ class TestDeviceManager(base.BaseTestCase):
|
|||
expected_ips = ['172.9.9.9/24', const.METADATA_CIDR]
|
||||
|
||||
if ipv6_enabled:
|
||||
expected_ips.append(const.METADATA_V6_CIDR)
|
||||
expected_ips.append(common_constants.METADATA_V6_CIDR)
|
||||
|
||||
expected = [mock.call.get_device_name(port)]
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import testtools
|
|||
from neutron.agent.linux import dhcp
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.cmd import runtime_checks as checks
|
||||
from neutron.common import _constants as common_constants
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.conf.agent import common as config
|
||||
from neutron.conf.agent import dhcp as dhcp_config
|
||||
|
@ -3295,7 +3296,7 @@ class TestDeviceManager(TestConfBase):
|
|||
if enable_isolated_metadata or force_metadata:
|
||||
expect_ips.extend([
|
||||
constants.METADATA_CIDR,
|
||||
constants.METADATA_V6_CIDR])
|
||||
common_constants.METADATA_V6_CIDR])
|
||||
mgr.driver.init_l3.assert_called_with('ns-XXX',
|
||||
expect_ips,
|
||||
namespace='qdhcp-ns')
|
||||
|
|
|
@ -792,7 +792,7 @@ class TestIpAddrCommand(TestIPCmdBase):
|
|||
def test_wait_until_address_dadfailed(self):
|
||||
self.addr_cmd.list = mock.Mock(
|
||||
return_value=[{'tentative': True, 'dadfailed': True}])
|
||||
with testtools.ExpectedException(ip_lib.AddressNotReady):
|
||||
with testtools.ExpectedException(ip_lib.DADFailed):
|
||||
self.addr_cmd.wait_until_address_ready('abcd::1234')
|
||||
|
||||
@mock.patch.object(common_utils, 'wait_until_true')
|
||||
|
|
|
@ -25,6 +25,7 @@ from oslo_utils import uuidutils
|
|||
from neutron.agent.l3 import agent as l3_agent
|
||||
from neutron.agent.l3 import router_info
|
||||
from neutron.agent.linux import external_process as ep
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import iptables_manager
|
||||
from neutron.agent.linux import utils as linux_utils
|
||||
from neutron.agent.metadata import driver as metadata_driver
|
||||
|
@ -76,6 +77,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
|
|||
EUNAME = 'neutron'
|
||||
EGNAME = 'neutron'
|
||||
METADATA_DEFAULT_IP = '169.254.169.254'
|
||||
METADATA_DEFAULT_IPV6 = 'fe80::a9fe:a9fe'
|
||||
METADATA_PORT = 8080
|
||||
METADATA_SOCKET = '/socket/path'
|
||||
PIDFILE = 'pidfile'
|
||||
|
@ -140,7 +142,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
|
|||
agent._process_updated_router(router)
|
||||
f.assert_not_called()
|
||||
|
||||
def test_spawn_metadata_proxy(self):
|
||||
def _test_spawn_metadata_proxy(self, dad_failed=False):
|
||||
router_id = _uuid()
|
||||
router_ns = 'qrouter-%s' % router_id
|
||||
service_name = 'haproxy'
|
||||
|
@ -165,22 +167,32 @@ class TestMetadataDriverProcess(base.BaseTestCase):
|
|||
'NamespaceManager.list_all', return_value={}),\
|
||||
mock.patch(
|
||||
'neutron.agent.linux.ip_lib.'
|
||||
'IpAddrCommand.wait_until_address_ready') as mock_wait:
|
||||
'IpAddrCommand.wait_until_address_ready') as mock_wait,\
|
||||
mock.patch(
|
||||
'neutron.agent.linux.ip_lib.'
|
||||
'delete_ip_address') as mock_del:
|
||||
agent = l3_agent.L3NATAgent('localhost')
|
||||
agent.process_monitor = mock.Mock()
|
||||
cfg_file = os.path.join(
|
||||
metadata_driver.HaproxyConfigurator.get_config_path(
|
||||
agent.conf.state_path),
|
||||
"%s.conf" % router_id)
|
||||
mock_open = self.useFixture(
|
||||
lib_fixtures.OpenFixture(cfg_file)).mock_open
|
||||
mock_wait.return_value = True
|
||||
if dad_failed:
|
||||
mock_wait.side_effect = ip_lib.DADFailed(
|
||||
address=self.METADATA_DEFAULT_IP, reason='DAD failed')
|
||||
else:
|
||||
mock_wait.return_value = True
|
||||
agent.metadata_driver.spawn_monitored_metadata_proxy(
|
||||
agent.process_monitor,
|
||||
router_ns,
|
||||
self.METADATA_PORT,
|
||||
agent.conf,
|
||||
bind_address=self.METADATA_DEFAULT_IP,
|
||||
router_id=router_id)
|
||||
router_id=router_id,
|
||||
bind_address_v6=self.METADATA_DEFAULT_IPV6,
|
||||
bind_interface='fake-if')
|
||||
|
||||
netns_execute_args = [
|
||||
service_name,
|
||||
|
@ -188,6 +200,8 @@ class TestMetadataDriverProcess(base.BaseTestCase):
|
|||
|
||||
log_tag = ("haproxy-" + metadata_driver.METADATA_SERVICE_NAME +
|
||||
"-" + router_id)
|
||||
bind_v6_line = 'bind %s:%s interface %s' % (
|
||||
self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if')
|
||||
cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % {
|
||||
'user': self.EUNAME,
|
||||
'group': self.EGNAME,
|
||||
|
@ -200,19 +214,35 @@ class TestMetadataDriverProcess(base.BaseTestCase):
|
|||
'pidfile': self.PIDFILE,
|
||||
'log_level': 'debug',
|
||||
'log_tag': log_tag,
|
||||
'bind_v6_line': ''}
|
||||
'bind_v6_line': bind_v6_line}
|
||||
|
||||
mock_open.assert_has_calls([
|
||||
mock.call(cfg_file, 'w'),
|
||||
mock.call().write(cfg_contents)],
|
||||
any_order=True)
|
||||
if dad_failed:
|
||||
agent.process_monitor.register.assert_not_called()
|
||||
mock_del.assert_called_once_with(self.METADATA_DEFAULT_IPV6,
|
||||
'fake-if',
|
||||
namespace=router_ns)
|
||||
else:
|
||||
mock_open.assert_has_calls([
|
||||
mock.call(cfg_file, 'w'),
|
||||
mock.call().write(cfg_contents)], any_order=True)
|
||||
|
||||
env = {ep.PROCESS_TAG: service_name + '-' + router_id}
|
||||
ip_mock.assert_has_calls([
|
||||
mock.call(namespace=router_ns),
|
||||
mock.call().netns.execute(netns_execute_args, addl_env=env,
|
||||
run_as_root=True)
|
||||
])
|
||||
env = {ep.PROCESS_TAG: service_name + '-' + router_id}
|
||||
ip_mock.assert_has_calls([
|
||||
mock.call(namespace=router_ns),
|
||||
mock.call().netns.execute(netns_execute_args, addl_env=env,
|
||||
run_as_root=True)
|
||||
])
|
||||
|
||||
agent.process_monitor.register.assert_called_once_with(
|
||||
router_id, metadata_driver.METADATA_SERVICE_NAME,
|
||||
mock.ANY)
|
||||
mock_del.assert_not_called()
|
||||
|
||||
def test_spawn_metadata_proxy(self):
|
||||
self._test_spawn_metadata_proxy()
|
||||
|
||||
def test_spawn_metadata_proxy_dad_failed(self):
|
||||
self._test_spawn_metadata_proxy(dad_failed=True)
|
||||
|
||||
def test_create_config_file_wrong_user(self):
|
||||
with mock.patch('pwd.getpwnam', side_effect=KeyError):
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
---
|
||||
issues:
|
||||
- |
|
||||
The high availability of metadata service on isolated networks is limited
|
||||
or non-existent. IPv4 metadata is redundant when the DHCP agent managing
|
||||
it is redundant, but recovery is tied to the renewal of the DHCP lease,
|
||||
making most recoveries very slow. IPv6 metadata is not redundant at all
|
||||
as the IPv6 metadata address can only be configured in a single place at
|
||||
a time as it is link-local. Multiple agents trying to configure it will
|
||||
generate an IPv6 duplicate address detection failure.
|
||||
|
||||
Administrators may observe the IPv6 metadata address in "dadfailed" state
|
||||
in the DHCP namespace for this reason, which is only an indication it is
|
||||
not highly available. Until a redesign is made to the isolated metadata
|
||||
service there is not a better deployment option. See `bug 1953165
|
||||
<https://bugs.launchpad.net/neutron/+bug/1953165>`_ for information.
|
Loading…
Reference in New Issue