From 8c44a45f6367932421748b401df2a660ae1c5197 Mon Sep 17 00:00:00 2001 From: Samuel Walladge Date: Wed, 21 Sep 2022 14:36:17 +0930 Subject: [PATCH] Wait for management interface IP to be assigned There can be a delay between the interface being created, and an IP address getting assigned, which previously caused a race condition where the config could be rendered before the IP address was ready resulting in the health manager bind_ip to be empty. This ensures that the IP address will be ready before continuing, which will ensure that the config rendering will not happen until ready, and the configure-resources action will only return once it's all done. Closes-Bug: #1961088 Change-Id: I2cae5f0e307c8cd14f1831f3416d890ad604b705 (cherry picked from commit d8d8963667cb620370e2c424dbe668460ab99ff1) --- src/lib/charm/openstack/api_crud.py | 37 ++++++++ src/lib/charm/openstack/octavia.py | 92 ++++++++++++++++--- .../test_lib_charm_openstack_api_crud.py | 4 + .../test_lib_charm_openstack_octavia.py | 4 - 4 files changed, 118 insertions(+), 19 deletions(-) diff --git a/src/lib/charm/openstack/api_crud.py b/src/lib/charm/openstack/api_crud.py index 25b38e26..13a57de3 100644 --- a/src/lib/charm/openstack/api_crud.py +++ b/src/lib/charm/openstack/api_crud.py @@ -26,6 +26,7 @@ import neutronclient import socket import subprocess import tenacity +import time from keystoneauth1 import identity as keystone_identity from keystoneauth1 import session as keystone_session @@ -507,6 +508,38 @@ def ensure_hm_port_mtu(identity_service): ch_core.hookenv.log('mgmt network not found - cannot set mtu') +def wait_for_address_on_mgmt_interface(): + """Poll for an address on the management interface. + + :returns: True if an address was found before timing out, else False + :rtype: bool + """ + # Sometimes RA packets are not sent in response immediately, + # so we must wait for the next one to be sent in the default interval. + # The default interval is 10 minutes, + # so we must allow for polling at least that long. + # LP: #1965883 + # 90 * 10 seconds = 15 minutes + POLL_TRIES = 90 + POLL_INTERVAL = 10 + + for _ in range(POLL_TRIES): + ch_core.hookenv.log('polling for address on mgmt interface', + level=ch_core.hookenv.DEBUG) + if octavia.get_address_on_mgmt_interface(): + ch_core.hookenv.log( + 'address found on mgmt interface', + level=ch_core.hookenv.INFO + ) + return True + + time.sleep(POLL_INTERVAL) + + ch_core.hookenv.log('timed out waiting for address on mgmt interface', + level=ch_core.hookenv.WARNING) + return False + + def setup_hm_port(identity_service, octavia_charm, host_id=None): """Create a per unit Neutron and OVS port for Octavia Health Manager. @@ -602,6 +635,10 @@ def setup_hm_port(identity_service, octavia_charm, host_id=None): toggle_hm_port(identity_service, octavia_charm.local_unit_name, enabled=True) + + if not wait_for_address_on_mgmt_interface(): + return False + return unit_changed diff --git a/src/lib/charm/openstack/octavia.py b/src/lib/charm/openstack/octavia.py index d8d2c674..fcbbd002 100644 --- a/src/lib/charm/openstack/octavia.py +++ b/src/lib/charm/openstack/octavia.py @@ -63,6 +63,51 @@ charms_openstack.charm.use_defaults('charm.default-select-release', 'config.changed') +def get_address_on_mgmt_interface(): + """ + Check for an address assigned to the managament interface and return it. + + Follow the same logic as used by health_manager_bind_ip(), + since that's the reason we need to check for an address. + + :returns: the address if an address was found, otherwise None + :rtype: Optional[str] + """ + for af in ['AF_INET6', 'AF_INET']: + try: + ips = ch_net_ip.get_iface_addr( + iface=OCTAVIA_MGMT_INTF, inet_type=af + ) + ch_core.hookenv.log( + 'Checking for address on mgmt interface; ' + 'found these IPs on {} ({}): {}'.format( + OCTAVIA_MGMT_INTF, af, ips, + ), + level=ch_core.hookenv.DEBUG, + ) + + ips = [ip for ip in ips if '%' not in ip] + if ips: + ch_core.hookenv.log( + 'Returning address found on mgmt interface: {}'.format( + ips[0], + ), + level=ch_core.hookenv.DEBUG, + ) + return ips[0] + except Exception as e: + # ch_net_ip.get_iface_addr() throws an exception of type + # Exception when the requested interface does not exist or if + # it has no addresses in the requested address family. + ch_core.hookenv.log( + 'Checking for address on mgmt interface failed: {}'.format(e), + level=ch_core.hookenv.DEBUG, + ) + pass + + return None + + @charms_openstack.adapters.config_property def health_manager_hwaddr(cls): """Return hardware address for Health Manager interface. @@ -100,21 +145,18 @@ def health_manager_bind_ip(cls): :returns: IP address of unit local Health Manager interface. :rtype: str """ - ip_list = [] - for af in ['AF_INET6', 'AF_INET']: - try: - ip_list.extend( - (ip for ip in - ch_net_ip.get_iface_addr(iface=OCTAVIA_MGMT_INTF, - inet_type=af) - if '%' not in ip)) - except Exception: - # ch_net_ip.get_iface_addr() throws an exception of type - # Exception when the requested interface does not exist or if - # it has no addresses in the requested address family. - pass - if ip_list: - return ip_list[0] + ip = get_address_on_mgmt_interface() + if ip: + return ip + + # we should only get to here if setup_hm_port has failed + # or never been called. + # because that function should create the interface + # that we're querying above. + ch_core.hookenv.log( + 'health_manager_bind_ip failed to discover any addresses', + level=ch_core.hookenv.WARNING + ) @charms_openstack.adapters.config_property @@ -435,6 +477,26 @@ class BaseOctaviaCharm(ch_plugins.PolicydOverridePlugin, 'examine documentation')] return states_to_check + def custom_assess_status_last_check(self): + """Add extra status checks. + + This is called by the base charm class assess_status handler, + after all other checks. + + This is a good place to put additional information about the running + service, such as cluster status etc. + + Return (None, None) if the status is okay (i.e. the unit is active). + Return ('active', message) do shortcut and force the unit to the active + status. + Return (other_status, message) to set the status to desired state. + + :returns: None, None - no action in this function. + """ + if not get_address_on_mgmt_interface(): + return ('blocked', 'no address on mgmt interface') + return (None, None) + def get_amqp_credentials(self): """Configure the AMQP credentials for Octavia.""" return ('octavia', 'openstack') diff --git a/unit_tests/test_lib_charm_openstack_api_crud.py b/unit_tests/test_lib_charm_openstack_api_crud.py index 857ebe23..17bd5223 100644 --- a/unit_tests/test_lib_charm_openstack_api_crud.py +++ b/unit_tests/test_lib_charm_openstack_api_crud.py @@ -303,6 +303,10 @@ class TestAPICrud(test_utils.PatchHelper): nc.list_networks.return_value = {'networks': [{'id': network_uuid, 'mtu': 9000}]} + self.patch_object(octavia.ch_net_ip, 'get_iface_addr') + self.get_iface_addr.return_value = [ + 'fe80:db8:42%eth0', '2001:db8:42::42', '127.0.0.1' + ] self.patch('subprocess.check_output', 'check_output') self.patch('subprocess.check_call', 'check_call') self.patch_object(api_crud, 'get_hm_port') diff --git a/unit_tests/test_lib_charm_openstack_octavia.py b/unit_tests/test_lib_charm_openstack_octavia.py index 8775bdeb..6430558b 100644 --- a/unit_tests/test_lib_charm_openstack_octavia.py +++ b/unit_tests/test_lib_charm_openstack_octavia.py @@ -50,14 +50,10 @@ class TestOctaviaCharmConfigProperties(Helper): self.assertEqual(octavia.health_manager_bind_ip(cls), data[1]) self.get_iface_addr.assert_any_call(iface=octavia.OCTAVIA_MGMT_INTF, inet_type='AF_INET6') - self.get_iface_addr.assert_any_call(iface=octavia.OCTAVIA_MGMT_INTF, - inet_type='AF_INET') self.get_iface_addr.return_value = [data[2]] self.assertEqual(octavia.health_manager_bind_ip(cls), data[2]) self.get_iface_addr.assert_any_call(iface=octavia.OCTAVIA_MGMT_INTF, inet_type='AF_INET6') - self.get_iface_addr.assert_any_call(iface=octavia.OCTAVIA_MGMT_INTF, - inet_type='AF_INET') def test_heartbeat_key(self): cls = mock.MagicMock()