From 914e639b529d791ab7570a76b7811740cad768af Mon Sep 17 00:00:00 2001 From: Alex Kavanagh Date: Fri, 1 Apr 2016 11:47:46 +0000 Subject: [PATCH] Enhanced pause/resume for maintenance mode Implemented pause/resume with some services checking. Charm-helpers sync to bring in pause/resume support. Change-Id: I8bcd7464a606f6fc9db374be6004d851935e749a --- actions.yaml | 4 + actions/pause | 1 + actions/pause_resume.py | 50 ++++++++++ actions/resume | 1 + hooks/charmhelpers/contrib/network/ip.py | 9 ++ .../contrib/openstack/amulet/deployment.py | 4 +- hooks/charmhelpers/contrib/openstack/ip.py | 42 ++++++-- hooks/nova_compute_context.py | 12 ++- hooks/nova_compute_hooks.py | 21 ++-- hooks/nova_compute_utils.py | 99 ++++++++++++++++--- tests/basic_deployment.py | 16 +++ .../contrib/openstack/amulet/deployment.py | 4 +- unit_tests/test_actions_pause_resume.py | 64 ++++++++++++ unit_tests/test_nova_compute_utils.py | 48 +++++++++ 14 files changed, 337 insertions(+), 38 deletions(-) create mode 120000 actions/pause create mode 100755 actions/pause_resume.py create mode 120000 actions/resume create mode 100644 unit_tests/test_actions_pause_resume.py diff --git a/actions.yaml b/actions.yaml index 92f46a1b..32b5fcaf 100644 --- a/actions.yaml +++ b/actions.yaml @@ -2,3 +2,7 @@ git-reinstall: description: Reinstall nova-compute from the openstack-origin-git repositories. openstack-upgrade: description: Perform openstack upgrades. Config option action-managed-upgrade must be set to True. +pause: + description: Pause the nova_compute unit. This action will stop nova_compute services. +resume: + descrpition: Resume the nova_compute unit. This action will start nova_compute services. diff --git a/actions/pause b/actions/pause new file mode 120000 index 00000000..bd4c0e00 --- /dev/null +++ b/actions/pause @@ -0,0 +1 @@ +pause_resume.py \ No newline at end of file diff --git a/actions/pause_resume.py b/actions/pause_resume.py new file mode 100755 index 00000000..e3176ab8 --- /dev/null +++ b/actions/pause_resume.py @@ -0,0 +1,50 @@ +#!/usr/bin/python + +import os +import sys + +sys.path.append('hooks') + +from charmhelpers.core.hookenv import action_fail +from nova_compute_utils import ( + pause_unit_helper, + resume_unit_helper, + register_configs, +) + + +def pause(args): + """Pause the Ceilometer services. + @raises Exception should the service fail to stop. + """ + pause_unit_helper(register_configs()) + + +def resume(args): + """Resume the Ceilometer services. + @raises Exception should the service fail to start.""" + resume_unit_helper(register_configs()) + + +# A dictionary of all the defined actions to callables (which take +# parsed arguments). +ACTIONS = {"pause": pause, "resume": resume} + + +def main(args): + action_name = os.path.basename(args[0]) + try: + action = ACTIONS[action_name] + except KeyError: + s = "Action {} undefined".format(action_name) + action_fail(s) + return s + else: + try: + action(args) + except Exception as e: + action_fail("Action {} failed: {}".format(action_name, str(e))) + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/actions/resume b/actions/resume new file mode 120000 index 00000000..bd4c0e00 --- /dev/null +++ b/actions/resume @@ -0,0 +1 @@ +pause_resume.py \ No newline at end of file diff --git a/hooks/charmhelpers/contrib/network/ip.py b/hooks/charmhelpers/contrib/network/ip.py index 4efe7993..b9c79000 100644 --- a/hooks/charmhelpers/contrib/network/ip.py +++ b/hooks/charmhelpers/contrib/network/ip.py @@ -191,6 +191,15 @@ get_iface_for_address = partial(_get_for_address, key='iface') get_netmask_for_address = partial(_get_for_address, key='netmask') +def resolve_network_cidr(ip_address): + ''' + Resolves the full address cidr of an ip_address based on + configured network interfaces + ''' + netmask = get_netmask_for_address(ip_address) + return str(netaddr.IPNetwork("%s/%s" % (ip_address, netmask)).cidr) + + def format_ipv6_addr(address): """If address is IPv6, wrap it in '[]' otherwise return None. diff --git a/hooks/charmhelpers/contrib/openstack/amulet/deployment.py b/hooks/charmhelpers/contrib/openstack/amulet/deployment.py index d2ede320..d21c9c78 100644 --- a/hooks/charmhelpers/contrib/openstack/amulet/deployment.py +++ b/hooks/charmhelpers/contrib/openstack/amulet/deployment.py @@ -126,7 +126,9 @@ class OpenStackAmuletDeployment(AmuletDeployment): # Charms which can not use openstack-origin, ie. many subordinates no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe', 'openvswitch-odl', 'neutron-api-odl', 'odl-controller', - 'cinder-backup'] + 'cinder-backup', 'nexentaedge-data', + 'nexentaedge-iscsi-gw', 'nexentaedge-swift-gw', + 'cinder-nexentaedge', 'nexentaedge-mgmt'] if self.openstack: for svc in services: diff --git a/hooks/charmhelpers/contrib/openstack/ip.py b/hooks/charmhelpers/contrib/openstack/ip.py index 3dca6dc1..532a1dc1 100644 --- a/hooks/charmhelpers/contrib/openstack/ip.py +++ b/hooks/charmhelpers/contrib/openstack/ip.py @@ -14,16 +14,19 @@ # You should have received a copy of the GNU Lesser General Public License # along with charm-helpers. If not, see . + from charmhelpers.core.hookenv import ( config, unit_get, service_name, + network_get_primary_address, ) from charmhelpers.contrib.network.ip import ( get_address_in_network, is_address_in_network, is_ipv6, get_ipv6_addr, + resolve_network_cidr, ) from charmhelpers.contrib.hahelpers.cluster import is_clustered @@ -33,16 +36,19 @@ ADMIN = 'admin' ADDRESS_MAP = { PUBLIC: { + 'binding': 'public', 'config': 'os-public-network', 'fallback': 'public-address', 'override': 'os-public-hostname', }, INTERNAL: { + 'binding': 'internal', 'config': 'os-internal-network', 'fallback': 'private-address', 'override': 'os-internal-hostname', }, ADMIN: { + 'binding': 'admin', 'config': 'os-admin-network', 'fallback': 'private-address', 'override': 'os-admin-hostname', @@ -110,7 +116,7 @@ def resolve_address(endpoint_type=PUBLIC): correct network. If clustered with no nets defined, return primary vip. If not clustered, return unit address ensuring address is on configured net - split if one is configured. + split if one is configured, or a Juju 2.0 extra-binding has been used. :param endpoint_type: Network endpoing type """ @@ -125,23 +131,45 @@ def resolve_address(endpoint_type=PUBLIC): net_type = ADDRESS_MAP[endpoint_type]['config'] net_addr = config(net_type) net_fallback = ADDRESS_MAP[endpoint_type]['fallback'] + binding = ADDRESS_MAP[endpoint_type]['binding'] clustered = is_clustered() - if clustered: - if not net_addr: - # If no net-splits defined, we expect a single vip - resolved_address = vips[0] - else: + + if clustered and vips: + if net_addr: for vip in vips: if is_address_in_network(net_addr, vip): resolved_address = vip break + else: + # NOTE: endeavour to check vips against network space + # bindings + try: + bound_cidr = resolve_network_cidr( + network_get_primary_address(binding) + ) + for vip in vips: + if is_address_in_network(bound_cidr, vip): + resolved_address = vip + break + except NotImplementedError: + # If no net-splits configured and no support for extra + # bindings/network spaces so we expect a single vip + resolved_address = vips[0] else: if config('prefer-ipv6'): fallback_addr = get_ipv6_addr(exc_list=vips)[0] else: fallback_addr = unit_get(net_fallback) - resolved_address = get_address_in_network(net_addr, fallback_addr) + if net_addr: + resolved_address = get_address_in_network(net_addr, fallback_addr) + else: + # NOTE: only try to use extra bindings if legacy network + # configuration is not in use + try: + resolved_address = network_get_primary_address(binding) + except NotImplementedError: + resolved_address = fallback_addr if resolved_address is None: raise ValueError("Unable to resolve a suitable IP address based on " diff --git a/hooks/nova_compute_context.py b/hooks/nova_compute_context.py index ba6f0945..7a261e76 100644 --- a/hooks/nova_compute_context.py +++ b/hooks/nova_compute_context.py @@ -19,7 +19,8 @@ from charmhelpers.core.hookenv import ( from charmhelpers.contrib.openstack.utils import ( get_host_ip, get_os_version_package, - get_os_version_codename + get_os_version_codename, + is_unit_paused_set, ) from charmhelpers.contrib.network.ovs import add_bridge @@ -487,9 +488,12 @@ class NeutronComputeContext(context.NeutronContext): return _neutron_security_groups() def _ensure_bridge(self): - if not service_running('openvswitch-switch'): - service_start('openvswitch-switch') - add_bridge(OVS_BRIDGE) + # NOTE(ajkavanagh) as this is called during the context usage (via + # __call__()) it should get executed when the unit comes off pause. + if not is_unit_paused_set(): + if not service_running('openvswitch-switch'): + service_start('openvswitch-switch') + add_bridge(OVS_BRIDGE) def ovs_ctxt(self): # In addition to generating config context, ensure the OVS service diff --git a/hooks/nova_compute_hooks.py b/hooks/nova_compute_hooks.py index a725cc2a..78b655aa 100755 --- a/hooks/nova_compute_hooks.py +++ b/hooks/nova_compute_hooks.py @@ -17,7 +17,6 @@ from charmhelpers.core.hookenv import ( status_set, ) from charmhelpers.core.host import ( - restart_on_change, service_restart, ) from charmhelpers.core.strutils import ( @@ -36,7 +35,8 @@ from charmhelpers.contrib.openstack.utils import ( git_install_requested, openstack_upgrade_available, os_requires_version, - set_os_workload_status, + is_unit_paused_set, + pausable_restart_on_change as restart_on_change, ) from charmhelpers.contrib.storage.linux.ceph import ( @@ -71,9 +71,8 @@ from nova_compute_utils import ( assert_charm_supports_ipv6, manage_ovs, install_hugepages, - REQUIRED_INTERFACES, - check_optional_relations, get_hugepage_number, + assess_status, ) from charmhelpers.contrib.network.ip import ( @@ -287,7 +286,8 @@ def ceph_joined(): status_set('maintenance', 'Installing apt packages') apt_install(filter_installed_packages(['ceph-common']), fatal=True) # Bug 1427660 - service_restart('libvirt-bin') + if not is_unit_paused_set(): + service_restart('libvirt-bin') def get_ceph_request(): @@ -325,8 +325,9 @@ def ceph_changed(rid=None, unit=None): if is_request_complete(get_ceph_request()): log('Request complete') # Ensure that nova-compute is restarted since only now can we - # guarantee that ceph resources are ready. - service_restart('nova-compute') + # guarantee that ceph resources are ready, but only if not paused. + if not is_unit_paused_set(): + service_restart('nova-compute') else: send_request_if_needed(get_ceph_request()) @@ -428,7 +429,8 @@ def lxc_changed(): if nonce and db.get('lxd-nonce') != nonce: db.set('lxd-nonce', nonce) configure_lxd(user='nova') - service_restart('nova-compute') + if not is_unit_paused_set(): + service_restart('nova-compute') @hooks.hook('nova-designate-relation-changed') @@ -448,8 +450,7 @@ def main(): hooks.execute(sys.argv) except UnregisteredHookError as e: log('Unknown hook {} - skipping.'.format(e)) - set_os_workload_status(CONFIGS, REQUIRED_INTERFACES, - charm_func=check_optional_relations) + assess_status(CONFIGS) if __name__ == '__main__': diff --git a/hooks/nova_compute_utils.py b/hooks/nova_compute_utils.py index 4046d8e7..585ed74d 100644 --- a/hooks/nova_compute_utils.py +++ b/hooks/nova_compute_utils.py @@ -37,7 +37,6 @@ from charmhelpers.core.hookenv import ( relation_get, DEBUG, INFO, - status_get, ) from charmhelpers.core.templating import render @@ -55,7 +54,10 @@ from charmhelpers.contrib.openstack.utils import ( git_pip_venv_dir, git_yaml_value, os_release, - set_os_workload_status, + is_unit_paused_set, + make_assess_status_func, + pause_unit, + resume_unit, ) from charmhelpers.contrib.python.packages import ( @@ -538,7 +540,9 @@ def do_openstack_upgrade(configs): configs.set_release(openstack_release=new_os_rel) configs.write_all() - [service_restart(s) for s in services()] + if not is_unit_paused_set(): + for s in services(): + service_restart(s) def import_keystone_ca_cert(): @@ -837,19 +841,84 @@ def install_hugepages(): subprocess.check_call(['update-rc.d', 'qemu-hugefsdir', 'defaults']) -def check_optional_relations(configs): - required_interfaces = {} +def get_optional_relations(): + """Return a dictionary of optional relations. + + @returns {relation: relation_name} + """ + optional_interfaces = {} if relation_ids('ceph'): - required_interfaces['storage-backend'] = ['ceph'] - + optional_interfaces['storage-backend'] = ['ceph'] if relation_ids('neutron-plugin'): - required_interfaces['neutron-plugin'] = ['neutron-plugin'] - + optional_interfaces['neutron-plugin'] = ['neutron-plugin'] if relation_ids('shared-db') or relation_ids('pgsql-db'): - required_interfaces['database'] = ['shared-db', 'pgsql-db'] + optional_interfaces['database'] = ['shared-db', 'pgsql-db'] + return optional_interfaces - if required_interfaces: - set_os_workload_status(configs, required_interfaces) - return status_get() - else: - return 'unknown', 'No optional relations' + +def assess_status(configs): + """Assess status of current unit + Decides what the state of the unit should be based on the current + configuration. + SIDE EFFECT: calls set_os_workload_status(...) which sets the workload + status of the unit. + Also calls status_set(...) directly if paused state isn't complete. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + assess_status_func(configs)() + + +def assess_status_func(configs): + """Helper function to create the function that will assess_status() for + the unit. + Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to + create the appropriate status function and then returns it. + Used directly by assess_status() and also for pausing and resuming + the unit. + + NOTE(ajkavanagh) ports are not checked due to race hazards with services + that don't behave sychronously w.r.t their service scripts. e.g. + apache2. + @param configs: a templating.OSConfigRenderer() object + @return f() -> None : a function that assesses the unit's workload status + """ + required_interfaces = REQUIRED_INTERFACES.copy() + required_interfaces.update(get_optional_relations()) + return make_assess_status_func( + configs, required_interfaces, + services=services(), ports=None) + + +def pause_unit_helper(configs): + """Helper function to pause a unit, and then call assess_status(...) in + effect, so that the status is correctly updated. + Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + _pause_resume_helper(pause_unit, configs) + + +def resume_unit_helper(configs): + """Helper function to resume a unit, and then call assess_status(...) in + effect, so that the status is correctly updated. + Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work. + @param configs: a templating.OSConfigRenderer() object + @returns None - this function is executed for its side-effect + """ + _pause_resume_helper(resume_unit, configs) + + +def _pause_resume_helper(f, configs): + """Helper function that uses the make_assess_status_func(...) from + charmhelpers.contrib.openstack.utils to create an assess_status(...) + function that can be used with the pause/resume of the unit + @param f: the function to be used with the assess_status(...) function + @returns None - this function is executed for its side-effect + """ + # TODO(ajkavanagh) - ports= has been left off because of the race hazard + # that exists due to service_start() + f(assess_status_func(configs), + services=services(), + ports=None) diff --git a/tests/basic_deployment.py b/tests/basic_deployment.py index d72f3123..700fe5b3 100644 --- a/tests/basic_deployment.py +++ b/tests/basic_deployment.py @@ -551,3 +551,19 @@ class NovaBasicDeployment(OpenStackAmuletDeployment): sleep_time = 0 self.d.configure(juju_service, set_default) + + def test_910_pause_and_resume(self): + """The services can be paused and resumed. """ + u.log.debug('Checking pause and resume actions...') + sentry_unit = self.nova_compute_sentry + + assert u.status_get(sentry_unit)[0] == "active" + + action_id = u.run_action(sentry_unit, "pause") + assert u.wait_on_action(action_id), "Pause action failed." + assert u.status_get(sentry_unit)[0] == "maintenance" + + action_id = u.run_action(sentry_unit, "resume") + assert u.wait_on_action(action_id), "Resume action failed." + assert u.status_get(sentry_unit)[0] == "active" + u.log.debug('OK') diff --git a/tests/charmhelpers/contrib/openstack/amulet/deployment.py b/tests/charmhelpers/contrib/openstack/amulet/deployment.py index d2ede320..d21c9c78 100644 --- a/tests/charmhelpers/contrib/openstack/amulet/deployment.py +++ b/tests/charmhelpers/contrib/openstack/amulet/deployment.py @@ -126,7 +126,9 @@ class OpenStackAmuletDeployment(AmuletDeployment): # Charms which can not use openstack-origin, ie. many subordinates no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe', 'openvswitch-odl', 'neutron-api-odl', 'odl-controller', - 'cinder-backup'] + 'cinder-backup', 'nexentaedge-data', + 'nexentaedge-iscsi-gw', 'nexentaedge-swift-gw', + 'cinder-nexentaedge', 'nexentaedge-mgmt'] if self.openstack: for svc in services: diff --git a/unit_tests/test_actions_pause_resume.py b/unit_tests/test_actions_pause_resume.py new file mode 100644 index 00000000..b8e44c06 --- /dev/null +++ b/unit_tests/test_actions_pause_resume.py @@ -0,0 +1,64 @@ +import mock +from mock import patch + +from test_utils import CharmTestCase + +with patch('nova_compute_utils.register_configs') as configs: + configs.return_value = 'test-config' + import pause_resume as actions + + +class PauseTestCase(CharmTestCase): + + def setUp(self): + super(PauseTestCase, self).setUp( + actions, ["pause_unit_helper"]) + + def test_pauses_services(self): + actions.pause([]) + self.pause_unit_helper.assert_called_once_with('test-config') + + +class ResumeTestCase(CharmTestCase): + + def setUp(self): + super(ResumeTestCase, self).setUp( + actions, ["resume_unit_helper"]) + + def test_pauses_services(self): + actions.resume([]) + self.resume_unit_helper.assert_called_once_with('test-config') + + +class MainTestCase(CharmTestCase): + + def setUp(self): + super(MainTestCase, self).setUp(actions, ["action_fail"]) + + def test_invokes_action(self): + dummy_calls = [] + + def dummy_action(args): + dummy_calls.append(True) + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, [True]) + + def test_unknown_action(self): + """Unknown actions aren't a traceback.""" + exit_string = actions.main(["foo"]) + self.assertEqual("Action foo undefined", exit_string) + + def test_failing_action(self): + """Actions which traceback trigger action_fail() calls.""" + dummy_calls = [] + + self.action_fail.side_effect = dummy_calls.append + + def dummy_action(args): + raise ValueError("uh oh") + + with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}): + actions.main(["foo"]) + self.assertEqual(dummy_calls, ["Action foo failed: uh oh"]) diff --git a/unit_tests/test_nova_compute_utils.py b/unit_tests/test_nova_compute_utils.py index 925cf414..a48f2938 100644 --- a/unit_tests/test_nova_compute_utils.py +++ b/unit_tests/test_nova_compute_utils.py @@ -711,3 +711,51 @@ class NovaComputeUtilsTests(CharmTestCase): _call.return_value = 0 utils.install_hugepages() self.assertEqual(self.fstab_mount.call_args_list, []) + + def test_assess_status(self): + with patch.object(utils, 'assess_status_func') as asf: + callee = MagicMock() + asf.return_value = callee + utils.assess_status('test-config') + asf.assert_called_once_with('test-config') + callee.assert_called_once_with() + + @patch.object(utils, 'REQUIRED_INTERFACES') + @patch.object(utils, 'services') + @patch.object(utils, 'make_assess_status_func') + @patch.object(utils, 'get_optional_relations') + def test_assess_status_func(self, + get_optional_relations, + make_assess_status_func, + services, + REQUIRED_INTERFACES): + services.return_value = 's1' + REQUIRED_INTERFACES.copy.return_value = {'test-interface': True} + get_optional_relations.return_value = {'optional': False} + test_interfaces = { + 'test-interface': True, + 'optional': False, + } + utils.assess_status_func('test-config') + # ports=None whilst port checks are disabled. + make_assess_status_func.assert_called_once_with( + 'test-config', test_interfaces, services='s1', ports=None) + + def test_pause_unit_helper(self): + with patch.object(utils, '_pause_resume_helper') as prh: + utils.pause_unit_helper('random-config') + prh.assert_called_once_with(utils.pause_unit, 'random-config') + with patch.object(utils, '_pause_resume_helper') as prh: + utils.resume_unit_helper('random-config') + prh.assert_called_once_with(utils.resume_unit, 'random-config') + + @patch.object(utils, 'services') + def test_pause_resume_helper(self, services): + f = MagicMock() + services.return_value = 's1' + with patch.object(utils, 'assess_status_func') as asf: + asf.return_value = 'assessor' + utils._pause_resume_helper(f, 'some-config') + asf.assert_called_once_with('some-config') + # ports=None whilst port checks are disabled. + f.assert_called_once_with('assessor', services='s1', ports=None)