From fe65e12b317e958204c35eda109675efc5c7aaa8 Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Tue, 16 Jul 2019 14:27:30 +0100 Subject: [PATCH] Add caching for knownhost private-address lookups This change adds caching for the host look ups associated with a private-address of a unit. This cache is maintained across hook invocations, and is designed to reduce the time spent in cloud-compute-relation-changed hooks (which occur as nova-compute units join and update on the cloud-compute relation). The feature has been added under an EXPERIMENTAL config flag (with the default being "don't use the cached values") in case there are any corner cases around DNS resolution in the deploying cloud during deployment. An action is included to allow clearing of the cache at unit, application and whole relation level. This clears the cache and re-triggers the host resolution, and relation updates. This is in case of either 1) DNS changed during the deployment, 2) DNS has been altered during the running of the cloud. Change-Id: I5a68bf4c30bf1591184d660d50559c969822ddcf --- actions.yaml | 48 ++++++++++++++-- actions/actions.py | 72 +++++++++++++++++++++++- actions/clear-unit-knownhost-cache | 1 + config.yaml | 25 +++++++++ hooks/nova_cc_hooks.py | 21 +++++-- hooks/nova_cc_utils.py | 61 +++++++++++++++------ unit_tests/test_actions.py | 88 ++++++++++++++++++++++++++++++ unit_tests/test_nova_cc_hooks.py | 18 +++--- 8 files changed, 296 insertions(+), 38 deletions(-) create mode 120000 actions/clear-unit-knownhost-cache diff --git a/actions.yaml b/actions.yaml index 4c218d1f..4d7e5695 100644 --- a/actions.yaml +++ b/actions.yaml @@ -1,15 +1,51 @@ openstack-upgrade: - description: Perform openstack upgrades. Config option action-managed-upgrade must be set to True. + description: | + Perform openstack upgrades. Config option action-managed-upgrade must be + set to True. pause: - description: Pause the nova-cloud-controller unit. This action will stop related services. + description: | + Pause the nova-cloud-controller unit. This action will stop related + services. resume: - descrpition: Resume the nova-cloud-controller unit. This action will start related services. + description: | + Resume the nova-cloud-controller unit. This action will start related + services. archive-data: - descrpition: Run job to archive deleted rows in database + description: Run job to archive deleted rows in database params: batch-size: type: integer default: 10000 - description: Archive old data to shadow tables + description: Archive old data to shadow tables security-checklist: - description: Validate the running configuration against the OpenStack security guides checklist + description: | + Validate the running configuration against the OpenStack security guides + checklist +clear-unit-knownhost-cache: + params: + unit: + target: string + default: "" + description: | + Clear the knownhost cache for (default) all the units, a service, or a + single unit. + . + The default is all units. If the 'target' param has an '/' in it, then it + is assumed ot be a single unit. If no '/' is present, then all the units + in a service will be refreshed. + . + e.g. target="nova-compute/4" will just clear the nova-compute/4 unit (in + the 'nova-compute' application), whereas target='nova-compute' will refresh + all of the units in the 'nova-compute' application. + . + The action triggers a refresh resolution of the known hosts for the unit, + which then populates the cache, updates the knownhosts file for the + associated service (e.g. 'nova-compute'), and, importantly, sets the + relation data for that associated service with the new knownhosts file. + This may cause a 'cloud-compute' relation changed hook on the associated + nova-compute units if the hosts have changed. + . + This action still functions even if the 'cache-known-hosts' config value is + not set; caching of hosts occurs regardless of that setting, and so this + action can be used to force an update if DNS has changed in the system, or + for a particular host (although this scenario is unlikely). diff --git a/actions/actions.py b/actions/actions.py index df80b4dc..26f90d94 100755 --- a/actions/actions.py +++ b/actions/actions.py @@ -29,6 +29,7 @@ _add_path(_root) import charmhelpers.core.hookenv as hookenv import hooks.nova_cc_utils as utils +import hooks.nova_cc_hooks as ncc_hooks def pause(args): @@ -52,12 +53,81 @@ def archive_data(args): max_rows=hookenv.action_get('batch-size'))}) +def clear_unit_knownhost_cache(args): + """Clear the knownhost cache for a unit (or all units), and then refresh + the knownhosts, and potentially set the relation data for the associated + service. + + If the target param doesn't match any unit or service, then the action does + nothing. + """ + target = hookenv.action_get('target') + hookenv.action_set({ + "Units updated": clear_knownhost_cache(target) + }) + + +def clear_knownhost_cache(target): + """Clear the known host cache for a target, rescan the affected units, + and then update the knownhosts file for the affected service(s) and set the + appropriate relation data. + + Examples of target are: + - "" = all services, all units (clear all the caches) + - "aservice" = clear all the units' caches on 'aservice' + - "aservice/4" = just clear this specific unit and update the relation on + that service. + + Note that if target doesn't match anything, then the function takes no + action and no Exception is raised. + + :param target: The target to clear. + :type target: str + :returns: a list of units that were affected. + :rtype: List[Dict[str, str]] + """ + affected_units = [] + + parts = target.split('/', 1) + target_service = parts[0] + is_unit = len(parts) > 1 + + for r_id in hookenv.relation_ids('cloud-compute'): + units = hookenv.related_units(r_id) + if not units: + continue + service = utils.remote_service_from_unit(unit=units[0]) + if target_service and service != target_service: + continue + + updated = False + for unit in units: + if is_unit and unit != target: + continue + private_address = hookenv.relation_get( + attribute='private-address', unit=unit, rid=r_id) + if private_address: + utils.clear_hostset_cache_for(private_address) + ncc_hooks.update_ssh_key(rid=r_id, unit=unit) + updated = True + affected_units.append({unit: private_address}) + + # Note that this uses the last unit in the relation; that's ok as it's + # only used to identify the service + if updated: + ncc_hooks.notify_ssh_keys_to_compute_units(rid=r_id, unit=unit) + + return affected_units + + # A dictionary of all the defined actions to callables (which take # parsed arguments). ACTIONS = { "pause": pause, "resume": resume, - "archive-data": archive_data} + "archive-data": archive_data, + "clear-unit-knownhost-cache": clear_unit_knownhost_cache, +} def main(args): diff --git a/actions/clear-unit-knownhost-cache b/actions/clear-unit-knownhost-cache new file mode 120000 index 00000000..405a394e --- /dev/null +++ b/actions/clear-unit-knownhost-cache @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/config.yaml b/config.yaml index b0b90f98..fdd24b8f 100644 --- a/config.yaml +++ b/config.yaml @@ -143,6 +143,31 @@ options: * shared-db * amqp * identity-service + cache-known-hosts: + type: boolean + default: false + description: | + EXPERIMENTAL - If true then the charm will cache host and ip lookups for + a unit when populating the knownhosts file for nova-compute service. + This is a known performance issue around maintaining the knownhosts files + for each nova-compute service, and caching is a strategy to reduce the + hook execution time when the 'cloud-compute' relation changes. If false, + then no caching is performed. Changing from true to false will NOT cause + new lookups to be performed. + . + To clear the caches and force new lookups to be performed, the action + 'clear-unit-knownhost-cache' should be used. + . + This config flag is experimental as it's very hard to determine if there + will be any DNS issues during the deployment onto different platforms. + Thus it may be preferred to keep the flag false during deployment and + then switch to true after deployment. + . + Note that the charm keeps a record of the lookups for each unit + regardless of the setting of this flag. The cache is only used if the + flag is true. + . + At a future release the default will be true. console-access-protocol: type: string default: diff --git a/hooks/nova_cc_hooks.py b/hooks/nova_cc_hooks.py index 2e537d85..f17c5eca 100755 --- a/hooks/nova_cc_hooks.py +++ b/hooks/nova_cc_hooks.py @@ -774,6 +774,9 @@ def update_ssh_key(rid=None, unit=None): private_address = rel_settings.get('private-address', None) hostname = rel_settings.get('hostname', '') + # only resolve the hosts once, so this is the memo for it + resolved_hosts = None + if migration_auth_type == 'ssh': # TODO(ajkavanagh) -- the hookenv was previous behaviour, but there # isn't a good place to put this yet; it will be moved or removed at @@ -786,8 +789,9 @@ def update_ssh_key(rid=None, unit=None): .format(rid or hookenv.relation_id(), unit or hookenv.remote_unit())) return - ncc_utils.ssh_resolve_compute_hosts( - remote_service, private_address, hostname, user=None) + resolved_hosts = ncc_utils.resolve_hosts_for(private_address, hostname) + ncc_utils.ssh_compute_add_known_hosts( + remote_service, resolved_hosts, user=None) ncc_utils.add_authorized_key_if_doesnt_exist( key, remote_service, private_address, user=None) @@ -795,8 +799,13 @@ def update_ssh_key(rid=None, unit=None): # Always try to fetch the user 'nova' key on the remote compute unit if nova_ssh_public_key: - ncc_utils.ssh_resolve_compute_hosts( - remote_service, private_address, hostname, user='nova') + # in the unlikely event the migration type wasn't ssh, we still have to + # resolve the hosts + if resolved_hosts is None: + resolved_hosts = ncc_utils.resolve_hosts_for(private_address, + hostname) + ncc_utils.ssh_compute_add_known_hosts( + remote_service, resolved_hosts, user='nova') ncc_utils.add_authorized_key_if_doesnt_exist( nova_ssh_public_key, remote_service, private_address, user='nova') @@ -899,8 +908,10 @@ def _batch_write_ssh_on_relation(rid, prefix, max_index, _iter): @hooks.hook('cloud-compute-relation-departed') def compute_departed(): + relation_data = hookenv.relation_get() ncc_utils.ssh_compute_remove( - public_key=hookenv.relation_get('ssh_public_key')) + public_key=relation_data.get('ssh_public_key')) + ncc_utils.clear_hostset_cache_for(relation_data.get('private-address')) @hooks.hook('neutron-network-service-relation-joined', diff --git a/hooks/nova_cc_utils.py b/hooks/nova_cc_utils.py index 1a808b62..945ccdd5 100644 --- a/hooks/nova_cc_utils.py +++ b/hooks/nova_cc_utils.py @@ -31,6 +31,7 @@ import charmhelpers.contrib.peerstorage as ch_peerstorage import charmhelpers.core.decorators as ch_decorators import charmhelpers.core.hookenv as hookenv import charmhelpers.core.host as ch_host +import charmhelpers.core.unitdata as unitdata import charmhelpers.fetch as ch_fetch import hooks.nova_cc_common as common @@ -1216,10 +1217,9 @@ def add_authorized_key_if_doesnt_exist(public_key, keys.write(public_key + '\n') -def ssh_resolve_compute_hosts(remote_service, - private_address, - hostname, - user=None): +def ssh_compute_add_known_hosts(remote_service, + resolved_hosts, + user=None): """Resolve all the host names for the private address, and store it against the remote service (effectively the relation) and an optional user. @@ -1230,21 +1230,18 @@ def ssh_resolve_compute_hosts(remote_service, :param remote_service: The remote service against which to store the hosts file. :type remote_service: str - :param private_address: The private address to resolve hostnames against. - :type private_address: Union[str, None] - :param hostname: An optional hostname (extracted from the relation data of - the unit), to also use to resolve hostnames for the compute unit. - :type hostname: str + :param resolved_hosts: The hosts to add + :type resolved_hosts: List[str] :param user: an optional user against which to store the resolved hostnames. :type user: Union[str, None] """ - for host in _resolve_hosts(private_address, hostname): + for host in resolved_hosts: # TODO(ajkavanagh) expensive add_known_host(host, remote_service, user) -def _resolve_hosts(private_address, hostname): +def resolve_hosts_for(private_address, hostname): """Return all of the resolved hosts for a unit Using private-address and (if availble) hostname attributes on the @@ -1265,35 +1262,65 @@ def _resolve_hosts(private_address, hostname): if private_address is None: return [] + db = unitdata.kv() + db_key = "hostset-{}".format(private_address) + cached_hostset = db.get(db_key, default=None) + if hostname: + hostname = hostname.lower() + + # only use the cached hostset if the config flag is true + if hookenv.config('cache-known-hosts') and cached_hostset is not None: + # in the unlikely event that we've already cached the host but the + # hostname is now present, add that in. + if (not ch_ip.is_ipv6(private_address) and + hostname and + hostname not in cached_hostset): + return cached_hostset + hostname + return cached_hostset + # Use a set to enforce uniqueness; order doesn't matter hosts = set() if not ch_ip.is_ipv6(private_address): if hostname: - hosts.add(hostname.lower()) + hosts.add(hostname) if not ch_utils.is_ip(private_address): hosts.append(private_address.lower()) - # TODO(ajkavanagh) expensive hosts.add(ch_utils.get_host_ip(private_address)) short = private_address.split('.')[0] - # TODO(ajkavanagh) expensive if ch_ip.ns_query(short): hosts.add(short.lower()) else: hosts.add(private_address) - # TODO(ajkavanagh) expensive hn = ch_utils.get_hostname(private_address) if hn: hosts.add(hn.lower()) short = hn.split('.')[0] - # TODO(ajkananagh) expensive if ch_ip.ns_query(short): hosts.add(short.lower()) else: hosts.add(private_address) - return list(hosts) + # Note, the cache is maintained regardless of whether the config + # 'cache-known-hosts' flag is set; the flag only affects usage and lookup. + hosts = list(hosts) + db.set(db_key, hosts) + db.flush() + + return hosts + + +def clear_hostset_cache_for(private_address): + """Clear the hostset cache for a private address that refers to a unit. + + :param private_address: the private address corresponding to the unit + :type private_address: str + """ + db = unitdata.kv() + db_key = "hostset-{}".format(private_address) + db.unset(db_key) + db.flush() def ssh_known_hosts_lines(remote_service, user=None): diff --git a/unit_tests/test_actions.py b/unit_tests/test_actions.py index d76b56f5..c41dcdc5 100644 --- a/unit_tests/test_actions.py +++ b/unit_tests/test_actions.py @@ -63,6 +63,94 @@ class ResumeTestCase(CharmTestCase): self.resume_unit_helper.assert_called_once_with('test-config') +class ClearUnitKnownhostCacheTestCase(CharmTestCase): + + @staticmethod + def _relation_get(attribute=None, unit=None, rid=None): + return { + 'aservice/1': '10.0.0.1', + 'aservice/2': '10.0.0.2', + 'aservice/3': '10.0.0.3', + 'aservice/4': '10.0.0.4', + 'bservice/1': '10.0.1.1', + 'bservice/2': '10.0.1.2', + 'bservice/3': '10.0.1.3', + }.get(unit) + + def setUp(self): + super(ClearUnitKnownhostCacheTestCase, self).setUp( + actions, [ + "charmhelpers.core.hookenv.action_get", + "charmhelpers.core.hookenv.action_set", + "charmhelpers.core.hookenv.relation_ids", + "charmhelpers.core.hookenv.related_units", + "charmhelpers.core.hookenv.relation_get", + "hooks.nova_cc_utils.clear_hostset_cache_for", + "hooks.nova_cc_hooks.update_ssh_key", + "hooks.nova_cc_hooks.notify_ssh_keys_to_compute_units", + ]) + self.relation_ids.return_value = ["r:1", "r:2"] + self.related_units.side_effect = [ + ['aservice/1', 'aservice/2', 'aservice/3', 'aservice/4'], + ['bservice/1', 'bservice/2', 'bservice/3'], + ] + self.relation_get.side_effect = \ + ClearUnitKnownhostCacheTestCase._relation_get + + def test_target_unit(self): + self.action_get.return_value = 'aservice/2' + actions.clear_unit_knownhost_cache([]) + self.action_set.assert_called_once_with({ + "Units updated": [{'aservice/2': '10.0.0.2'}] + }) + self.clear_hostset_cache_for.assert_called_once_with('10.0.0.2') + self.update_ssh_key.assert_called_once_with(rid="r:1", + unit="aservice/2") + self.notify_ssh_keys_to_compute_units.assert_called_once_with( + rid="r:1", unit="aservice/4") + + def test_target_service(self): + self.action_get.return_value = 'bservice' + actions.clear_unit_knownhost_cache([]) + self.action_set.assert_called_once_with({ + "Units updated": [ + {'bservice/1': '10.0.1.1'}, + {'bservice/2': '10.0.1.2'}, + {'bservice/3': '10.0.1.3'}, + ] + }) + self.clear_hostset_cache_for.assert_has_calls( + [mock.call('10.0.1.1'), + mock.call('10.0.1.2'), + mock.call('10.0.1.3')]) + self.update_ssh_key.assert_has_calls( + [mock.call(rid="r:2", unit="bservice/1"), + mock.call(rid="r:2", unit="bservice/2"), + mock.call(rid="r:2", unit="bservice/3")]) + self.notify_ssh_keys_to_compute_units.assert_has_calls( + [mock.call(rid="r:2", unit="bservice/3")]) + + def test_target_all(self): + self.action_get.return_value = '' + actions.clear_unit_knownhost_cache([]) + self.action_set.assert_called_once_with({ + "Units updated": [ + {'aservice/1': '10.0.0.1'}, + {'aservice/2': '10.0.0.2'}, + {'aservice/3': '10.0.0.3'}, + {'aservice/4': '10.0.0.4'}, + {'bservice/1': '10.0.1.1'}, + {'bservice/2': '10.0.1.2'}, + {'bservice/3': '10.0.1.3'}, + ] + }) + # check both services were updated; that'll imply the other calls were + # made. + self.notify_ssh_keys_to_compute_units.assert_has_calls( + [mock.call(rid="r:1", unit="aservice/4"), + mock.call(rid="r:2", unit="bservice/3")]) + + class MainTestCase(CharmTestCase): def setUp(self): diff --git a/unit_tests/test_nova_cc_hooks.py b/unit_tests/test_nova_cc_hooks.py index 07ab958f..cac2311e 100644 --- a/unit_tests/test_nova_cc_hooks.py +++ b/unit_tests/test_nova_cc_hooks.py @@ -68,7 +68,7 @@ TO_PATCH = [ 'hooks.nova_cc_utils.serial_console_settings', 'hooks.nova_cc_utils.services', 'hooks.nova_cc_utils.ssh_authorized_keys_lines', - 'hooks.nova_cc_utils.ssh_resolve_compute_hosts', + 'hooks.nova_cc_utils.ssh_compute_add_known_hosts', 'hooks.nova_cc_utils.ssh_known_hosts_lines', 'uuid', ] @@ -425,14 +425,14 @@ class NovaCCHooksTests(CharmTestCase): hooks._goal_state_achieved_for_relid('aservice', None)) @patch('hooks.nova_cc_utils.add_authorized_key_if_doesnt_exist') - @patch('hooks.nova_cc_utils.ssh_resolve_compute_hosts') + @patch('hooks.nova_cc_utils.ssh_compute_add_known_hosts') @patch('hooks.nova_cc_hooks._goal_state_achieved_for_relid') @patch('hooks.nova_cc_utils.remote_service_from_unit') def test_update_ssh_keys_and_notify_compute_units_ssh_migration( self, mock_remote_service_from_unit, mock__goal_state_achieved_for_relid, - mock_ssh_resolve_compute_hosts, + mock_ssh_compute_add_known_hosts, mock_add_authorized_key_if_doesnt_exist): mock_remote_service_from_unit.return_value = 'aservice' mock__goal_state_achieved_for_relid.return_value = True @@ -444,8 +444,8 @@ class NovaCCHooksTests(CharmTestCase): self.ssh_authorized_keys_lines.return_value = [ 'auth_0', 'auth_1', 'auth_2'] hooks.update_ssh_keys_and_notify_compute_units() - mock_ssh_resolve_compute_hosts.assert_called_once_with( - 'aservice', '10.0.0.1', '', user=None) + mock_ssh_compute_add_known_hosts.assert_called_once_with( + 'aservice', ['10.0.0.1'], user=None) mock_add_authorized_key_if_doesnt_exist.assert_called_once_with( 'fookey', 'aservice', '10.0.0.1', user=None) expected_relations = [ @@ -470,14 +470,14 @@ class NovaCCHooksTests(CharmTestCase): 'cloud-compute', None) @patch('hooks.nova_cc_utils.add_authorized_key_if_doesnt_exist') - @patch('hooks.nova_cc_utils.ssh_resolve_compute_hosts') + @patch('hooks.nova_cc_utils.ssh_compute_add_known_hosts') @patch('hooks.nova_cc_hooks._goal_state_achieved_for_relid') @patch('hooks.nova_cc_utils.remote_service_from_unit') def test_update_ssh_keys_and_notify_compute_units_nova_public_key( self, mock_remote_service_from_unit, mock__goal_state_achieved_for_relid, - mock_ssh_resolve_compute_hosts, + mock_ssh_compute_add_known_hosts, mock_add_authorized_key_if_doesnt_exist): mock_remote_service_from_unit.return_value = 'aservice' mock__goal_state_achieved_for_relid.return_value = True @@ -489,8 +489,8 @@ class NovaCCHooksTests(CharmTestCase): self.ssh_authorized_keys_lines.return_value = [ 'auth_0', 'auth_1', 'auth_2'] hooks.update_ssh_keys_and_notify_compute_units() - mock_ssh_resolve_compute_hosts.assert_called_once_with( - 'aservice', '10.0.0.1', '', user='nova') + mock_ssh_compute_add_known_hosts.assert_called_once_with( + 'aservice', ['10.0.0.1'], user='nova') mock_add_authorized_key_if_doesnt_exist.assert_called_once_with( 'fookey', 'aservice', '10.0.0.1', user='nova') expected_relations = [