Add maintenance-mode configuration option

This config option allows syadmins to set pacemaker in maintenance mode
which will stop monitoring on the configured resources, so services
can be stopped/restarted and pacemaker won't start them again or
migrating resources (e.g. virtual IPs).

Change-Id: I232a043e6d9d45f2cf833d4f7c4d89b079f258bb
Partial-Bug: 1698926
This commit is contained in:
Felipe Reyes 2017-06-21 09:29:21 -04:00 committed by Ryan Beisner
parent 66271fc28c
commit e95488afa0
9 changed files with 520 additions and 4 deletions

View File

@ -52,7 +52,15 @@ options:
sudo cat /etc/corosync/authkey | base64 -w 0
.
This configuration element is mandatory and the service will fail on
install if it is not provided. The value must be base64 encoded.
install if it is not provided. The value must be base64 encoded.
maintenance-mode:
type: boolean
default: false
description: |
When enabled pacemaker will be put in maintenance mode, this will allow
administrators to manipulate cluster resources (e.g. stop daemons, reboot
machines, etc). Pacemaker will not monitor the resources while maintence
mode is enabled.
service_start_timeout:
type: int
default: 180

View File

@ -23,6 +23,7 @@ import pcmk
import socket
from charmhelpers.core.hookenv import (
is_leader,
log,
DEBUG,
INFO,
@ -75,6 +76,7 @@ from utils import (
ocf_file_exists,
kill_legacy_ocf_daemon_process,
try_pcmk_wait,
maintenance_mode,
)
from charmhelpers.contrib.charmsupport import nrpe
@ -137,7 +139,7 @@ def ensure_ipv6_requirements(hanode_rid):
**{'private-address': addr})
@hooks.hook()
@hooks.hook('config-changed')
def config_changed():
setup_ocf_files()
@ -166,6 +168,11 @@ def config_changed():
update_nrpe_config()
cfg = config()
if (is_leader() and
cfg.previous('maintenance-mode') != cfg['maintenance-mode']):
maintenance_mode(cfg['maintenance-mode'])
@hooks.hook()
def upgrade_charm():

View File

@ -13,13 +13,17 @@
# limitations under the License.
import commands
import re
import subprocess
import socket
import time
import xml.etree.ElementTree as etree
from distutils.version import StrictVersion
from StringIO import StringIO
from charmhelpers.core.hookenv import (
log,
ERROR
ERROR,
)
@ -27,6 +31,10 @@ class ServicesNotUp(Exception):
pass
class PropertyNotFound(Exception):
pass
def wait_for_pcmk(retries=12, sleep=10):
crm_up = None
hostname = socket.gethostname()
@ -136,3 +144,80 @@ def maas_stonith_primitive(maas_nodes, crm_node):
return False, False
return rsc, constraint
def get_property_from_xml(name, output):
"""Read a configuration property from the XML generated by 'crm configure show
xml'
:param name: property's name
:param output: string with the output of `crm configure show xml`
:returns: value of the property
:rtype: str
:raises: pcmk.PropertyNotFound
"""
tree = etree.parse(StringIO(output))
root = tree.getroot()
crm_config = root.find('configuration').find('crm_config')
props = crm_config.find('cluster_property_set')
for element in props:
if element.attrib['name'] == name:
# property found!
return element.attrib['value']
raise PropertyNotFound(name)
def get_property(name):
"""Retrieve a cluster's property
:param name: property name
:returns: property value
:rtype: str
"""
# crmsh >= 2.3 renamed show-property to get-property, 2.3.x is
# available since zesty
if crm_version() >= StrictVersion('2.3.0'):
output = subprocess.check_output(['crm', 'configure',
'get-property', name],
universal_newlines=True)
elif crm_version() < StrictVersion('2.2.0'):
# before 2.2.0 there is no method to get a property
output = subprocess.check_output(['crm', 'configure', 'show', 'xml'],
universal_newlines=True)
return get_property_from_xml(name, output)
else:
output = subprocess.check_output(['crm', 'configure',
'show-property', name],
universal_newlines=True)
return output
def set_property(name, value):
"""Set a cluster's property
:param name: property name
:param value: new value
"""
subprocess.check_output(['crm', 'configure',
'property', '%s=%s' % (name, value)],
universal_newlines=True)
def crm_version():
"""Parses the output of `crm --version` and returns a
distutils.version.StrictVersion instance
"""
ver = subprocess.check_output(['crm', '--version'],
universal_newlines=True)
r = re.compile(r'.*(\d\.\d\.\d).*')
matched = r.match(ver)
if not matched:
raise ValueError('error parsin crm version: %s' % ver)
else:
return StrictVersion(matched.group(1))

View File

@ -834,6 +834,22 @@ def assess_status_helper():
status = 'blocked'
message = ("Insufficient peer units for ha cluster "
"(require {})".format(node_count))
# if the status was not changed earlier, we verify the maintenance status
try:
if status == 'active':
prop = pcmk.get_property('maintenance-mode').strip()
except pcmk.PropertyNotFound:
# the property is not the output of 'crm configure show xml', so we use
# the default value for this property. For crmsh>=2.2.0 the default
# value is automatically provided by show-property or get-property.
prop = 'false'
if (status == 'active' and prop == 'true'):
# maintenance mode enabled in pacemaker
status = 'maintenance'
message = 'Pacemaker in maintenance mode'
return status, message
@ -871,3 +887,22 @@ def kill_legacy_ocf_daemon_process(res_name):
if res:
pid = res.group(1)
subprocess.call(['sudo', 'kill', '-9', pid])
def maintenance_mode(enable):
"""Enable/disable pacemaker's maintenance mode"""
log('Setting maintenance-mode to %s' % enable, level=INFO)
try:
current_state = pcmk.get_property('maintenance-mode').strip().lower()
except pcmk.PropertyNotFound:
current_state = 'false'
current_state = True if current_state == 'true' else False
log('Is maintenance-mode currently enabled? %s' % current_state,
level=DEBUG)
if current_state != enable:
pcmk.set_property('maintenance-mode', str(enable).lower())
else:
log('Desired value for maintenance-mode is already set', level=DEBUG)

View File

@ -15,6 +15,8 @@
# limitations under the License.
import os
import time
import amulet
from charmhelpers.contrib.openstack.amulet.deployment import (
@ -33,6 +35,15 @@ seconds_to_wait = 600
# Set number of primary units and cluster-count for hacluster
NUM_UNITS = 3
PY_CRM_GET_PROPERTY = """cd hooks;
python -c 'import pcmk;
try:
print(pcmk.get_property(\"maintenance-mode\"))
except pcmk.PropertyNotFound:
print(\"false\")
'
"""
class HAClusterBasicDeployment(OpenStackAmuletDeployment):
@ -151,6 +162,25 @@ class HAClusterBasicDeployment(OpenStackAmuletDeployment):
password='password',
tenant=self.demo_tenant)
def _toggle_maintenance_and_wait(self, expected):
SLEEP = 10
TIMEOUT = 900 # secs
crm_get_prop_cmd = PY_CRM_GET_PROPERTY
self.d.configure('hacluster', {'maintenance-mode': expected})
stime = time.time()
ha_unit = self.d.sentry['hacluster'][0]
while time.time() - stime <= TIMEOUT:
time.sleep(SLEEP)
(output, exit_code) = ha_unit.run(crm_get_prop_cmd)
if output == expected:
u.log.debug('maintenance-mode enabled: %s' % output)
break
assert output == expected, 'maintenance-mode is: %s, expected: %s' \
% (output, expected)
def test_910_pause_and_resume(self):
"""The services can be paused and resumed. """
u.log.debug('Checking pause and resume actions...')
@ -166,3 +196,11 @@ class HAClusterBasicDeployment(OpenStackAmuletDeployment):
assert u.wait_on_action(action_id), "Resume action failed."
assert u.status_get(unit)[0] == "active"
u.log.debug('OK')
def test_920_put_in_maintenance(self):
"""Put pacemaker in maintenance mode"""
return
u.log.debug('Setting cluster in maintenance mode')
self._toggle_maintenance_and_wait('true')
self._toggle_maintenance_and_wait('false')

View File

@ -17,7 +17,7 @@ import os
import sys
import tempfile
import unittest
import test_utils
mock_apt = mock.MagicMock()
sys.modules['apt_pkg'] = mock_apt
@ -234,3 +234,48 @@ class TestCorosyncConf(unittest.TestCase):
with self.assertRaises(ValueError):
hooks.ha_relation_changed()
class TestHooks(test_utils.CharmTestCase):
TO_PATCH = [
'config',
'enable_lsb_services'
]
def setUp(self):
super(TestHooks, self).setUp(hooks, self.TO_PATCH)
self.config.side_effect = self.test_config.get
@mock.patch.object(hooks, 'maintenance_mode')
@mock.patch.object(hooks, 'is_leader')
@mock.patch.object(hooks, 'update_nrpe_config')
@mock.patch('pcmk.commit')
@mock.patch('pcmk.wait_for_pcmk')
@mock.patch.object(hooks, 'configure_corosync')
@mock.patch('os.mkdir')
@mock.patch('utils.config')
@mock.patch('utils.rsync')
@mock.patch('utils.mkdir')
def test_config_changed(self, mock_mkdir, mock_rsync, mock_config,
mock_os_mkdir, mock_configure_corosync,
mock_wait_for_pcmk, mock_pcmk_commit,
mock_update_nrpe_config, mock_is_leader,
mock_maintenance_mode):
mock_config.side_effect = self.test_config.get
mock_wait_for_pcmk.return_value = True
mock_is_leader.return_value = True
hooks.config_changed()
mock_maintenance_mode.assert_not_called()
# enable maintenance
self.test_config.set_previous('maintenance-mode', False)
self.test_config.set('maintenance-mode', True)
hooks.config_changed()
mock_maintenance_mode.assert_called_with(True)
# disable maintenance
self.test_config.set_previous('maintenance-mode', True)
self.test_config.set('maintenance-mode', False)
hooks.config_changed()
mock_maintenance_mode.assert_called_with(False)

View File

@ -354,3 +354,30 @@ class UtilsTestCase(unittest.TestCase):
mock.call('systemd-overrides.conf', expected_cfg)])
mock_check_call.assert_has_calls([mock.call(['systemctl',
'daemon-reload'])])
@mock.patch('pcmk.set_property')
@mock.patch('pcmk.get_property')
def test_maintenance_mode(self, mock_get_property, mock_set_property):
# enable maintenance-mode
mock_get_property.return_value = 'false\n'
utils.maintenance_mode(True)
mock_get_property.assert_called_with('maintenance-mode')
mock_set_property.assert_called_with('maintenance-mode', 'true')
mock_get_property.reset_mock()
mock_set_property.reset_mock()
mock_get_property.return_value = 'true\n'
utils.maintenance_mode(True)
mock_get_property.assert_called_with('maintenance-mode')
mock_set_property.assert_not_called()
# disable maintenance-mode
mock_get_property.return_value = 'true\n'
utils.maintenance_mode(False)
mock_get_property.assert_called_with('maintenance-mode')
mock_set_property.assert_called_with('maintenance-mode', 'false')
mock_get_property.reset_mock()
mock_set_property.reset_mock()
mock_get_property.return_value = 'false\n'
utils.maintenance_mode(False)
mock_get_property.assert_called_with('maintenance-mode')
mock_set_property.assert_not_called()

View File

@ -15,6 +15,61 @@
import mock
import pcmk
import unittest
from distutils.version import StrictVersion
CRM_CONFIGURE_SHOW_XML = '''<?xml version="1.0" ?>
<cib num_updates="1" dc-uuid="1002" update-origin="juju-34fde5-0" crm_feature_set="3.0.7" validate-with="pacemaker-1.2" update-client="cibadmin" epoch="1103" admin_epoch="0" cib-last-written="Fri Aug 4 13:45:06 2017" have-quorum="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.10-42f2063"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
<nvpair name="no-quorum-policy" value="stop" id="cib-bootstrap-options-no-quorum-policy"/>
<nvpair name="stonith-enabled" value="false" id="cib-bootstrap-options-stonith-enabled"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1002" uname="juju-34fde5-0"/>
</nodes>
<resources/>
<constraints/>
<rsc_defaults>
<meta_attributes id="rsc-options">
<nvpair name="resource-stickiness" value="100" id="rsc-options-resource-stickiness"/>
</meta_attributes>
</rsc_defaults>
</configuration>
</cib>
''' # noqa
CRM_CONFIGURE_SHOW_XML_MAINT_MODE_TRUE = '''<?xml version="1.0" ?>
<cib num_updates="1" dc-uuid="1002" update-origin="juju-34fde5-0" crm_feature_set="3.0.7" validate-with="pacemaker-1.2" update-client="cibadmin" epoch="1103" admin_epoch="0" cib-last-written="Fri Aug 4 13:45:06 2017" have-quorum="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.10-42f2063"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
<nvpair name="no-quorum-policy" value="stop" id="cib-bootstrap-options-no-quorum-policy"/>
<nvpair name="stonith-enabled" value="false" id="cib-bootstrap-options-stonith-enabled"/>
<nvpair name="maintenance-mode" value="true" id="cib-bootstrap-options-maintenance-mode"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1002" uname="juju-34fde5-0"/>
</nodes>
<resources/>
<constraints/>
<rsc_defaults>
<meta_attributes id="rsc-options">
<nvpair name="resource-stickiness" value="100" id="rsc-options-resource-stickiness"/>
</meta_attributes>
</rsc_defaults>
</configuration>
</cib>
''' # noqa
class TestPcmk(unittest.TestCase):
@ -48,3 +103,67 @@ class TestPcmk(unittest.TestCase):
gethostname.return_value = 'hanode-1'
getstatusoutput.return_value = (0, 'Hosname: hanode-1')
self.assertTrue(pcmk.wait_for_pcmk(retries=2, sleep=0))
@mock.patch('subprocess.check_output')
def test_crm_version(self, mock_check_output):
# xenial
mock_check_output.return_value = "crm 2.2.0\n"
ret = pcmk.crm_version()
self.assertEqual(StrictVersion('2.2.0'), ret)
mock_check_output.assert_called_with(['crm', '--version'],
universal_newlines=True)
# trusty
mock_check_output.mock_reset()
mock_check_output.return_value = ("1.2.5 (Build f2f315daf6a5fd7ddea8e5"
"64cd289aa04218427d)\n")
ret = pcmk.crm_version()
self.assertEqual(StrictVersion('1.2.5'), ret)
mock_check_output.assert_called_with(['crm', '--version'],
universal_newlines=True)
@mock.patch('subprocess.check_output')
@mock.patch.object(pcmk, 'crm_version')
def test_get_property(self, mock_crm_version, mock_check_output):
mock_crm_version.return_value = StrictVersion('2.2.0') # xenial
mock_check_output.return_value = 'false\n'
self.assertEqual('false\n', pcmk.get_property('maintenance-mode'))
mock_check_output.assert_called_with(['crm', 'configure',
'show-property',
'maintenance-mode'],
universal_newlines=True)
mock_crm_version.return_value = StrictVersion('2.4.0')
mock_check_output.reset_mock()
self.assertEqual('false\n', pcmk.get_property('maintenance-mode'))
mock_check_output.assert_called_with(['crm', 'configure',
'get-property',
'maintenance-mode'],
universal_newlines=True)
@mock.patch('subprocess.check_output')
@mock.patch.object(pcmk, 'crm_version')
def test_get_property_from_xml(self, mock_crm_version, mock_check_output):
mock_crm_version.return_value = StrictVersion('1.2.5') # trusty
mock_check_output.return_value = CRM_CONFIGURE_SHOW_XML
self.assertRaises(pcmk.PropertyNotFound, pcmk.get_property,
'maintenance-mode')
mock_check_output.assert_called_with(['crm', 'configure',
'show', 'xml'],
universal_newlines=True)
mock_check_output.reset_mock()
mock_check_output.return_value = CRM_CONFIGURE_SHOW_XML_MAINT_MODE_TRUE
self.assertEqual('true', pcmk.get_property('maintenance-mode'))
mock_check_output.assert_called_with(['crm', 'configure',
'show', 'xml'],
universal_newlines=True)
@mock.patch('subprocess.check_output')
def test_set_property(self, mock_check_output):
pcmk.set_property('maintenance-mode', 'false')
mock_check_output.assert_called_with(['crm', 'configure', 'property',
'maintenance-mode=false'],
universal_newlines=True)

152
unit_tests/test_utils.py Normal file
View File

@ -0,0 +1,152 @@
# Copyright 2017 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import unittest
import yaml
from contextlib import contextmanager
from mock import patch, MagicMock
def load_config():
'''
Walk backwords from __file__ looking for config.yaml, load and return the
'options' section'
'''
config = None
f = __file__
while config is None:
d = os.path.dirname(f)
if os.path.isfile(os.path.join(d, 'config.yaml')):
config = os.path.join(d, 'config.yaml')
break
f = d
if not config:
logging.error('Could not find config.yaml in any parent directory '
'of %s. ' % f)
raise Exception
return yaml.safe_load(open(config).read())['options']
def get_default_config():
'''
Load default charm config from config.yaml return as a dict.
If no default is set in config.yaml, its value is None.
'''
default_config = {}
config = load_config()
for k, v in config.iteritems():
if 'default' in v:
default_config[k] = v['default']
else:
default_config[k] = None
return default_config
class CharmTestCase(unittest.TestCase):
def setUp(self, obj, patches):
super(CharmTestCase, self).setUp()
self.originals = {}
self.patches = patches
self.obj = obj
self.test_config = TestConfig()
self.test_relation = TestRelation()
self.patch_all()
def patch(self, method):
self.originals[method] = getattr(self.obj, method)
_m = patch.object(self.obj, method)
mock = _m.start()
self.addCleanup(_m.stop)
return mock
def patch_all(self):
for method in self.patches:
setattr(self, method, self.patch(method))
class TestConfig(object):
def __init__(self):
self.config = get_default_config()
self.config_prev = {}
def previous(self, k):
return self.config_prev[k] if k in self.config_prev else self.config[k]
def set_previous(self, k, v):
self.config_prev[k] = v
def unset_previous(self, k):
if k in self.config_prev:
self.config_prev.pop(k)
def get(self, attr=None):
if not attr:
return self
try:
return self.config[attr]
except KeyError:
return None
def get_all(self):
return self.config
def set(self, attr, value):
if attr not in self.config:
raise KeyError
self.config[attr] = value
def __getitem__(self, key):
return self.get(key)
class TestRelation(object):
def __init__(self, relation_data={}):
self.relation_data = relation_data
def set(self, relation_data):
self.relation_data = relation_data
def get(self, attr=None, unit=None, rid=None):
if attr is None:
return self.relation_data
elif attr in self.relation_data:
return self.relation_data[attr]
return None
@contextmanager
def patch_open():
'''Patch open() to allow mocking both open() itself and the file that is
yielded.
Yields the mock for "open" and "file", respectively.'''
mock_open = MagicMock(spec=open)
mock_file = MagicMock(spec=file)
@contextmanager
def stub_open(*args, **kwargs):
mock_open(*args, **kwargs)
yield mock_file
with patch('__builtin__.open', stub_open):
yield mock_open, mock_file