Enhanced pause/resume for maintenance mode

Implemented pause/resume with some services checking.
Charm-helpers sync to bring in pause/resume support.

Change-Id: I8bcd7464a606f6fc9db374be6004d851935e749a
This commit is contained in:
Alex Kavanagh 2016-04-01 11:47:46 +00:00
parent da4f012a9b
commit 914e639b52
14 changed files with 337 additions and 38 deletions

View File

@ -2,3 +2,7 @@ git-reinstall:
description: Reinstall nova-compute from the openstack-origin-git repositories.
openstack-upgrade:
description: Perform openstack upgrades. Config option action-managed-upgrade must be set to True.
pause:
description: Pause the nova_compute unit. This action will stop nova_compute services.
resume:
descrpition: Resume the nova_compute unit. This action will start nova_compute services.

1
actions/pause Symbolic link
View File

@ -0,0 +1 @@
pause_resume.py

50
actions/pause_resume.py Executable file
View File

@ -0,0 +1,50 @@
#!/usr/bin/python
import os
import sys
sys.path.append('hooks')
from charmhelpers.core.hookenv import action_fail
from nova_compute_utils import (
pause_unit_helper,
resume_unit_helper,
register_configs,
)
def pause(args):
"""Pause the Ceilometer services.
@raises Exception should the service fail to stop.
"""
pause_unit_helper(register_configs())
def resume(args):
"""Resume the Ceilometer services.
@raises Exception should the service fail to start."""
resume_unit_helper(register_configs())
# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {"pause": pause, "resume": resume}
def main(args):
action_name = os.path.basename(args[0])
try:
action = ACTIONS[action_name]
except KeyError:
s = "Action {} undefined".format(action_name)
action_fail(s)
return s
else:
try:
action(args)
except Exception as e:
action_fail("Action {} failed: {}".format(action_name, str(e)))
if __name__ == "__main__":
sys.exit(main(sys.argv))

1
actions/resume Symbolic link
View File

@ -0,0 +1 @@
pause_resume.py

View File

@ -191,6 +191,15 @@ get_iface_for_address = partial(_get_for_address, key='iface')
get_netmask_for_address = partial(_get_for_address, key='netmask')
def resolve_network_cidr(ip_address):
'''
Resolves the full address cidr of an ip_address based on
configured network interfaces
'''
netmask = get_netmask_for_address(ip_address)
return str(netaddr.IPNetwork("%s/%s" % (ip_address, netmask)).cidr)
def format_ipv6_addr(address):
"""If address is IPv6, wrap it in '[]' otherwise return None.

View File

@ -126,7 +126,9 @@ class OpenStackAmuletDeployment(AmuletDeployment):
# Charms which can not use openstack-origin, ie. many subordinates
no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe',
'openvswitch-odl', 'neutron-api-odl', 'odl-controller',
'cinder-backup']
'cinder-backup', 'nexentaedge-data',
'nexentaedge-iscsi-gw', 'nexentaedge-swift-gw',
'cinder-nexentaedge', 'nexentaedge-mgmt']
if self.openstack:
for svc in services:

View File

@ -14,16 +14,19 @@
# You should have received a copy of the GNU Lesser General Public License
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
from charmhelpers.core.hookenv import (
config,
unit_get,
service_name,
network_get_primary_address,
)
from charmhelpers.contrib.network.ip import (
get_address_in_network,
is_address_in_network,
is_ipv6,
get_ipv6_addr,
resolve_network_cidr,
)
from charmhelpers.contrib.hahelpers.cluster import is_clustered
@ -33,16 +36,19 @@ ADMIN = 'admin'
ADDRESS_MAP = {
PUBLIC: {
'binding': 'public',
'config': 'os-public-network',
'fallback': 'public-address',
'override': 'os-public-hostname',
},
INTERNAL: {
'binding': 'internal',
'config': 'os-internal-network',
'fallback': 'private-address',
'override': 'os-internal-hostname',
},
ADMIN: {
'binding': 'admin',
'config': 'os-admin-network',
'fallback': 'private-address',
'override': 'os-admin-hostname',
@ -110,7 +116,7 @@ def resolve_address(endpoint_type=PUBLIC):
correct network. If clustered with no nets defined, return primary vip.
If not clustered, return unit address ensuring address is on configured net
split if one is configured.
split if one is configured, or a Juju 2.0 extra-binding has been used.
:param endpoint_type: Network endpoing type
"""
@ -125,23 +131,45 @@ def resolve_address(endpoint_type=PUBLIC):
net_type = ADDRESS_MAP[endpoint_type]['config']
net_addr = config(net_type)
net_fallback = ADDRESS_MAP[endpoint_type]['fallback']
binding = ADDRESS_MAP[endpoint_type]['binding']
clustered = is_clustered()
if clustered:
if not net_addr:
# If no net-splits defined, we expect a single vip
resolved_address = vips[0]
else:
if clustered and vips:
if net_addr:
for vip in vips:
if is_address_in_network(net_addr, vip):
resolved_address = vip
break
else:
# NOTE: endeavour to check vips against network space
# bindings
try:
bound_cidr = resolve_network_cidr(
network_get_primary_address(binding)
)
for vip in vips:
if is_address_in_network(bound_cidr, vip):
resolved_address = vip
break
except NotImplementedError:
# If no net-splits configured and no support for extra
# bindings/network spaces so we expect a single vip
resolved_address = vips[0]
else:
if config('prefer-ipv6'):
fallback_addr = get_ipv6_addr(exc_list=vips)[0]
else:
fallback_addr = unit_get(net_fallback)
resolved_address = get_address_in_network(net_addr, fallback_addr)
if net_addr:
resolved_address = get_address_in_network(net_addr, fallback_addr)
else:
# NOTE: only try to use extra bindings if legacy network
# configuration is not in use
try:
resolved_address = network_get_primary_address(binding)
except NotImplementedError:
resolved_address = fallback_addr
if resolved_address is None:
raise ValueError("Unable to resolve a suitable IP address based on "

View File

@ -19,7 +19,8 @@ from charmhelpers.core.hookenv import (
from charmhelpers.contrib.openstack.utils import (
get_host_ip,
get_os_version_package,
get_os_version_codename
get_os_version_codename,
is_unit_paused_set,
)
from charmhelpers.contrib.network.ovs import add_bridge
@ -487,9 +488,12 @@ class NeutronComputeContext(context.NeutronContext):
return _neutron_security_groups()
def _ensure_bridge(self):
if not service_running('openvswitch-switch'):
service_start('openvswitch-switch')
add_bridge(OVS_BRIDGE)
# NOTE(ajkavanagh) as this is called during the context usage (via
# __call__()) it should get executed when the unit comes off pause.
if not is_unit_paused_set():
if not service_running('openvswitch-switch'):
service_start('openvswitch-switch')
add_bridge(OVS_BRIDGE)
def ovs_ctxt(self):
# In addition to generating config context, ensure the OVS service

View File

@ -17,7 +17,6 @@ from charmhelpers.core.hookenv import (
status_set,
)
from charmhelpers.core.host import (
restart_on_change,
service_restart,
)
from charmhelpers.core.strutils import (
@ -36,7 +35,8 @@ from charmhelpers.contrib.openstack.utils import (
git_install_requested,
openstack_upgrade_available,
os_requires_version,
set_os_workload_status,
is_unit_paused_set,
pausable_restart_on_change as restart_on_change,
)
from charmhelpers.contrib.storage.linux.ceph import (
@ -71,9 +71,8 @@ from nova_compute_utils import (
assert_charm_supports_ipv6,
manage_ovs,
install_hugepages,
REQUIRED_INTERFACES,
check_optional_relations,
get_hugepage_number,
assess_status,
)
from charmhelpers.contrib.network.ip import (
@ -287,7 +286,8 @@ def ceph_joined():
status_set('maintenance', 'Installing apt packages')
apt_install(filter_installed_packages(['ceph-common']), fatal=True)
# Bug 1427660
service_restart('libvirt-bin')
if not is_unit_paused_set():
service_restart('libvirt-bin')
def get_ceph_request():
@ -325,8 +325,9 @@ def ceph_changed(rid=None, unit=None):
if is_request_complete(get_ceph_request()):
log('Request complete')
# Ensure that nova-compute is restarted since only now can we
# guarantee that ceph resources are ready.
service_restart('nova-compute')
# guarantee that ceph resources are ready, but only if not paused.
if not is_unit_paused_set():
service_restart('nova-compute')
else:
send_request_if_needed(get_ceph_request())
@ -428,7 +429,8 @@ def lxc_changed():
if nonce and db.get('lxd-nonce') != nonce:
db.set('lxd-nonce', nonce)
configure_lxd(user='nova')
service_restart('nova-compute')
if not is_unit_paused_set():
service_restart('nova-compute')
@hooks.hook('nova-designate-relation-changed')
@ -448,8 +450,7 @@ def main():
hooks.execute(sys.argv)
except UnregisteredHookError as e:
log('Unknown hook {} - skipping.'.format(e))
set_os_workload_status(CONFIGS, REQUIRED_INTERFACES,
charm_func=check_optional_relations)
assess_status(CONFIGS)
if __name__ == '__main__':

View File

@ -37,7 +37,6 @@ from charmhelpers.core.hookenv import (
relation_get,
DEBUG,
INFO,
status_get,
)
from charmhelpers.core.templating import render
@ -55,7 +54,10 @@ from charmhelpers.contrib.openstack.utils import (
git_pip_venv_dir,
git_yaml_value,
os_release,
set_os_workload_status,
is_unit_paused_set,
make_assess_status_func,
pause_unit,
resume_unit,
)
from charmhelpers.contrib.python.packages import (
@ -538,7 +540,9 @@ def do_openstack_upgrade(configs):
configs.set_release(openstack_release=new_os_rel)
configs.write_all()
[service_restart(s) for s in services()]
if not is_unit_paused_set():
for s in services():
service_restart(s)
def import_keystone_ca_cert():
@ -837,19 +841,84 @@ def install_hugepages():
subprocess.check_call(['update-rc.d', 'qemu-hugefsdir', 'defaults'])
def check_optional_relations(configs):
required_interfaces = {}
def get_optional_relations():
"""Return a dictionary of optional relations.
@returns {relation: relation_name}
"""
optional_interfaces = {}
if relation_ids('ceph'):
required_interfaces['storage-backend'] = ['ceph']
optional_interfaces['storage-backend'] = ['ceph']
if relation_ids('neutron-plugin'):
required_interfaces['neutron-plugin'] = ['neutron-plugin']
optional_interfaces['neutron-plugin'] = ['neutron-plugin']
if relation_ids('shared-db') or relation_ids('pgsql-db'):
required_interfaces['database'] = ['shared-db', 'pgsql-db']
optional_interfaces['database'] = ['shared-db', 'pgsql-db']
return optional_interfaces
if required_interfaces:
set_os_workload_status(configs, required_interfaces)
return status_get()
else:
return 'unknown', 'No optional relations'
def assess_status(configs):
"""Assess status of current unit
Decides what the state of the unit should be based on the current
configuration.
SIDE EFFECT: calls set_os_workload_status(...) which sets the workload
status of the unit.
Also calls status_set(...) directly if paused state isn't complete.
@param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect
"""
assess_status_func(configs)()
def assess_status_func(configs):
"""Helper function to create the function that will assess_status() for
the unit.
Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to
create the appropriate status function and then returns it.
Used directly by assess_status() and also for pausing and resuming
the unit.
NOTE(ajkavanagh) ports are not checked due to race hazards with services
that don't behave sychronously w.r.t their service scripts. e.g.
apache2.
@param configs: a templating.OSConfigRenderer() object
@return f() -> None : a function that assesses the unit's workload status
"""
required_interfaces = REQUIRED_INTERFACES.copy()
required_interfaces.update(get_optional_relations())
return make_assess_status_func(
configs, required_interfaces,
services=services(), ports=None)
def pause_unit_helper(configs):
"""Helper function to pause a unit, and then call assess_status(...) in
effect, so that the status is correctly updated.
Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work.
@param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect
"""
_pause_resume_helper(pause_unit, configs)
def resume_unit_helper(configs):
"""Helper function to resume a unit, and then call assess_status(...) in
effect, so that the status is correctly updated.
Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work.
@param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect
"""
_pause_resume_helper(resume_unit, configs)
def _pause_resume_helper(f, configs):
"""Helper function that uses the make_assess_status_func(...) from
charmhelpers.contrib.openstack.utils to create an assess_status(...)
function that can be used with the pause/resume of the unit
@param f: the function to be used with the assess_status(...) function
@returns None - this function is executed for its side-effect
"""
# TODO(ajkavanagh) - ports= has been left off because of the race hazard
# that exists due to service_start()
f(assess_status_func(configs),
services=services(),
ports=None)

View File

@ -551,3 +551,19 @@ class NovaBasicDeployment(OpenStackAmuletDeployment):
sleep_time = 0
self.d.configure(juju_service, set_default)
def test_910_pause_and_resume(self):
"""The services can be paused and resumed. """
u.log.debug('Checking pause and resume actions...')
sentry_unit = self.nova_compute_sentry
assert u.status_get(sentry_unit)[0] == "active"
action_id = u.run_action(sentry_unit, "pause")
assert u.wait_on_action(action_id), "Pause action failed."
assert u.status_get(sentry_unit)[0] == "maintenance"
action_id = u.run_action(sentry_unit, "resume")
assert u.wait_on_action(action_id), "Resume action failed."
assert u.status_get(sentry_unit)[0] == "active"
u.log.debug('OK')

View File

@ -126,7 +126,9 @@ class OpenStackAmuletDeployment(AmuletDeployment):
# Charms which can not use openstack-origin, ie. many subordinates
no_origin = ['cinder-ceph', 'hacluster', 'neutron-openvswitch', 'nrpe',
'openvswitch-odl', 'neutron-api-odl', 'odl-controller',
'cinder-backup']
'cinder-backup', 'nexentaedge-data',
'nexentaedge-iscsi-gw', 'nexentaedge-swift-gw',
'cinder-nexentaedge', 'nexentaedge-mgmt']
if self.openstack:
for svc in services:

View File

@ -0,0 +1,64 @@
import mock
from mock import patch
from test_utils import CharmTestCase
with patch('nova_compute_utils.register_configs') as configs:
configs.return_value = 'test-config'
import pause_resume as actions
class PauseTestCase(CharmTestCase):
def setUp(self):
super(PauseTestCase, self).setUp(
actions, ["pause_unit_helper"])
def test_pauses_services(self):
actions.pause([])
self.pause_unit_helper.assert_called_once_with('test-config')
class ResumeTestCase(CharmTestCase):
def setUp(self):
super(ResumeTestCase, self).setUp(
actions, ["resume_unit_helper"])
def test_pauses_services(self):
actions.resume([])
self.resume_unit_helper.assert_called_once_with('test-config')
class MainTestCase(CharmTestCase):
def setUp(self):
super(MainTestCase, self).setUp(actions, ["action_fail"])
def test_invokes_action(self):
dummy_calls = []
def dummy_action(args):
dummy_calls.append(True)
with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
actions.main(["foo"])
self.assertEqual(dummy_calls, [True])
def test_unknown_action(self):
"""Unknown actions aren't a traceback."""
exit_string = actions.main(["foo"])
self.assertEqual("Action foo undefined", exit_string)
def test_failing_action(self):
"""Actions which traceback trigger action_fail() calls."""
dummy_calls = []
self.action_fail.side_effect = dummy_calls.append
def dummy_action(args):
raise ValueError("uh oh")
with mock.patch.dict(actions.ACTIONS, {"foo": dummy_action}):
actions.main(["foo"])
self.assertEqual(dummy_calls, ["Action foo failed: uh oh"])

View File

@ -711,3 +711,51 @@ class NovaComputeUtilsTests(CharmTestCase):
_call.return_value = 0
utils.install_hugepages()
self.assertEqual(self.fstab_mount.call_args_list, [])
def test_assess_status(self):
with patch.object(utils, 'assess_status_func') as asf:
callee = MagicMock()
asf.return_value = callee
utils.assess_status('test-config')
asf.assert_called_once_with('test-config')
callee.assert_called_once_with()
@patch.object(utils, 'REQUIRED_INTERFACES')
@patch.object(utils, 'services')
@patch.object(utils, 'make_assess_status_func')
@patch.object(utils, 'get_optional_relations')
def test_assess_status_func(self,
get_optional_relations,
make_assess_status_func,
services,
REQUIRED_INTERFACES):
services.return_value = 's1'
REQUIRED_INTERFACES.copy.return_value = {'test-interface': True}
get_optional_relations.return_value = {'optional': False}
test_interfaces = {
'test-interface': True,
'optional': False,
}
utils.assess_status_func('test-config')
# ports=None whilst port checks are disabled.
make_assess_status_func.assert_called_once_with(
'test-config', test_interfaces, services='s1', ports=None)
def test_pause_unit_helper(self):
with patch.object(utils, '_pause_resume_helper') as prh:
utils.pause_unit_helper('random-config')
prh.assert_called_once_with(utils.pause_unit, 'random-config')
with patch.object(utils, '_pause_resume_helper') as prh:
utils.resume_unit_helper('random-config')
prh.assert_called_once_with(utils.resume_unit, 'random-config')
@patch.object(utils, 'services')
def test_pause_resume_helper(self, services):
f = MagicMock()
services.return_value = 's1'
with patch.object(utils, 'assess_status_func') as asf:
asf.return_value = 'assessor'
utils._pause_resume_helper(f, 'some-config')
asf.assert_called_once_with('some-config')
# ports=None whilst port checks are disabled.
f.assert_called_once_with('assessor', services='s1', ports=None)