Code cleamup. No functional changes.

This commit is contained in:
Edward Hope-Morley 2015-03-27 22:14:18 -07:00
parent c20b73dd54
commit 323d18cf48
11 changed files with 566 additions and 550 deletions

View File

@ -2,13 +2,21 @@
PYTHON := /usr/bin/env python
lint:
@flake8 --exclude hooks/charmhelpers hooks unit_tests
@flake8 --exclude hooks/charmhelpers hooks unit_tests tests
@charm proof
unit_test:
@echo Starting tests...
@echo Starting unit tests...
@$(PYTHON) /usr/bin/nosetests --nologcapture --with-coverage unit_tests
test:
@echo Starting Amulet tests...
# coreycb note: The -v should only be temporary until Amulet sends
# raise_status() messages to stderr:
# https://bugs.launchpad.net/amulet/+bug/1320357
@juju test -v -p AMULET_HTTP_PROXY --timeout 900 \
00-setup 10-bundles-test
bin/charm_helpers_sync.py:
@mkdir -p bin
@bzr cat lp:charm-helpers/tools/charm_helpers_sync/charm_helpers_sync.py \
@ -16,3 +24,7 @@ bin/charm_helpers_sync.py:
sync: bin/charm_helpers_sync.py
@$(PYTHON) bin/charm_helpers_sync.py -c charm-helpers.yaml
publish: lint unit_test
bzr push lp:charms/hacluster
bzr push lp:charms/trusty/hacluster

27
TODO
View File

@ -1,27 +0,0 @@
HA Cluster (pacemaker/corosync) Charm
======================================
* Peer-relations
- make sure node was added to the cluster
- make sure node has been removed from the cluster (when deleting unit)
* One thing that can be done is to:
1. ha-relation-joined puts node in standby.
2. ha-relation-joined makes HA configuration
3. on hanode-relation-joined (2 or more nodes)
- services are stopped from upstart/lsb
- nodes are put in online mode
- services are loaded by cluster
- this way is not in HA until we have a second node.
* Needs to communicate the VIP to the top service
* TODO: Fix Disable upstart jobs
- sudo sh -c "echo 'manual' > /etc/init/SERVICE.override"
update-rc.d -f pacemaker remove
update-rc.d pacemaker start 50 1 2 3 4 5 . stop 01 0 6 .
TODO: Problem seems to be that peer-relation gets executed before the subordinate relation.
In that case, peer relation would have to put nodes in standby and then the subordinate relation
will have to put the nodes online and configure the services. Or probably not use it at all.
Hanode-relation puts node in standby.
ha-relation counts nodes in hanode-relation and if >2 then we online them and setup cluster.

View File

@ -1,7 +1,23 @@
options:
debug:
type: boolean
default: False
description: Enable debug logging
prefer-ipv6:
type: boolean
default: False
description: |
If True enables IPv6 support. The charm will expect network interfaces
to be configured with an IPv6 address. If set to False (default) IPv4
is expected.
.
NOTE: these charms do not currently support IPv6 privacy extension. In
order for this charm to function correctly, the privacy extension must be
disabled and a non-temporary address must be configured/available on
your network interface.
corosync_mcastaddr:
default: 226.94.1.1
type: string
default: 226.94.1.1
description: |
Multicast IP address to use for exchanging messages over the network.
If multiple clusters are on the same bindnetaddr network, this value
@ -34,9 +50,9 @@ options:
type: string
default: 'False'
description: |
Enable resource fencing (aka STONITH) for every node in the cluster.
This requires MAAS credentials be provided and each node's power
parameters are properly configured in its invenvory.
Enable resource fencing (aka STONITH) for every node in the cluster.
This requires MAAS credentials be provided and each node's power
parameters are properly configured in its invenvory.
maas_url:
type: string
default:
@ -59,38 +75,22 @@ options:
type: string
default:
description: |
One or more IPs, separated by space, that will be used as a saftey check
for avoiding split brain situations. Nodes in the cluster will ping these
IPs periodicaly. Node that can not ping monitor_host will not run shared
resources (VIP, shared disk...).
One or more IPs, separated by space, that will be used as a saftey check
for avoiding split brain situations. Nodes in the cluster will ping these
IPs periodicaly. Node that can not ping monitor_host will not run shared
resources (VIP, shared disk...).
monitor_interval:
type: string
default: 5s
description: |
Time period between checks of resource health. It consists of a number
and a time factor, e.g. 5s = 5 seconds. 2m = 2 minutes.
Time period between checks of resource health. It consists of a number
and a time factor, e.g. 5s = 5 seconds. 2m = 2 minutes.
netmtu:
type: int
default: 1500
description: MTU size corosync used for communication.
prefer-ipv6:
type: boolean
default: False
description: |
If True enables IPv6 support. The charm will expect network interfaces
to be configured with an IPv6 address. If set to False (default) IPv4
is expected.
.
NOTE: these charms do not currently support IPv6 privacy extension. In
order for this charm to function correctly, the privacy extension must be
disabled and a non-temporary address must be configured/available on
your network interface.
corosync_transport:
type: string
default: "multicast"
description: |
Two supported modes are multicast (udp) or unicast (udpu)
debug:
default: False
type: boolean
description: Enable debug logging

View File

@ -1,45 +1,31 @@
#!/usr/bin/python
#
# Copyright 2012 Canonical Ltd.
#
# Authors:
# Andres Rodriguez <andres.rodriguez@canonical.com>
#
import ast
import shutil
import sys
import os
from base64 import b64decode
import sys
import maas as MAAS
import pcmk
import hacluster
import socket
from charmhelpers.core.hookenv import (
log,
relation_get,
DEBUG,
INFO,
related_units,
relation_ids,
relation_set,
unit_get,
config,
Hooks, UnregisteredHookError,
local_unit,
unit_private_ip,
Hooks,
UnregisteredHookError,
)
from charmhelpers.core.host import (
service_start,
service_stop,
service_restart,
service_running,
write_file,
mkdir,
file_hash,
lsb_release
)
from charmhelpers.fetch import (
@ -49,27 +35,24 @@ from charmhelpers.fetch import (
)
from charmhelpers.contrib.hahelpers.cluster import (
peer_ips,
peer_units,
oldest_peer
)
from charmhelpers.contrib.openstack.utils import get_host_ip
from utils import (
get_corosync_conf,
assert_charm_supports_ipv6,
get_cluster_nodes,
parse_data,
configure_corosync,
configure_stonith,
configure_monitor_host,
configure_cluster_global,
)
hooks = Hooks()
COROSYNC_CONF = '/etc/corosync/corosync.conf'
COROSYNC_DEFAULT = '/etc/default/corosync'
COROSYNC_AUTHKEY = '/etc/corosync/authkey'
COROSYNC_CONF_FILES = [
COROSYNC_DEFAULT,
COROSYNC_AUTHKEY,
COROSYNC_CONF
]
PACKAGES = ['corosync', 'pacemaker', 'python-netaddr', 'ipmitool']
SUPPORTED_TRANSPORTS = ['udp', 'udpu', 'multicast', 'unicast']
@hooks.hook()
@ -84,110 +67,6 @@ def install():
if not os.path.isfile('/usr/lib/ocf/resource.d/ceph/rbd'):
shutil.copy('ocf/ceph/rbd', '/usr/lib/ocf/resource.d/ceph/rbd')
_deprecated_transport_values = {"multicast": "udp", "unicast": "udpu"}
def get_transport():
transport = config('corosync_transport')
val = _deprecated_transport_values.get(transport, transport)
if val not in ['udp', 'udpu']:
msg = ("Unsupported corosync_transport type '%s' - supported "
"types are: %s" % (transport, ', '.join(SUPPORTED_TRANSPORTS)))
raise ValueError(msg)
return val
def get_corosync_id(unit_name):
# Corosync nodeid 0 is reserved so increase all the nodeids to avoid it
off_set = 1000
return off_set + int(unit_name.split('/')[1])
def get_ha_nodes():
ha_units = peer_ips(peer_relation='hanode')
ha_units[local_unit()] = unit_private_ip()
ha_nodes = {}
for unit in ha_units:
corosync_id = get_corosync_id(unit)
ha_nodes[corosync_id] = get_host_ip(ha_units[unit])
return ha_nodes
def get_corosync_conf():
if config('prefer-ipv6'):
ip_version = 'ipv6'
bindnetaddr = hacluster.get_ipv6_network_address
else:
ip_version = 'ipv4'
bindnetaddr = hacluster.get_network_address
# NOTE(jamespage) use local charm configuration over any provided by
# principle charm
conf = {
'corosync_bindnetaddr':
bindnetaddr(config('corosync_bindiface')),
'corosync_mcastport': config('corosync_mcastport'),
'corosync_mcastaddr': config('corosync_mcastaddr'),
'ip_version': ip_version,
'ha_nodes': get_ha_nodes(),
'transport': get_transport(),
'debug': config('debug'),
}
if None not in conf.itervalues():
return conf
conf = {}
for relid in relation_ids('ha'):
for unit in related_units(relid):
bindiface = relation_get('corosync_bindiface',
unit, relid)
conf = {
'corosync_bindnetaddr': bindnetaddr(bindiface),
'corosync_mcastport': relation_get('corosync_mcastport',
unit, relid),
'corosync_mcastaddr': config('corosync_mcastaddr'),
'ip_version': ip_version,
'ha_nodes': get_ha_nodes(),
'transport': get_transport(),
'debug': config('debug'),
}
if config('prefer-ipv6'):
conf['nodeid'] = get_corosync_id(local_unit())
conf['netmtu'] = config('netmtu')
if None not in conf.itervalues():
return conf
missing = [k for k, v in conf.iteritems() if v is None]
log('Missing required configuration: %s' % missing)
return None
def emit_corosync_conf():
corosync_conf_context = get_corosync_conf()
if corosync_conf_context:
write_file(path=COROSYNC_CONF,
content=render_template('corosync.conf',
corosync_conf_context))
return True
else:
return False
def emit_base_conf():
corosync_default_context = {'corosync_enabled': 'yes'}
write_file(path=COROSYNC_DEFAULT,
content=render_template('corosync',
corosync_default_context))
corosync_key = config('corosync_key')
if corosync_key:
write_file(path=COROSYNC_AUTHKEY,
content=b64decode(corosync_key),
perms=0o400)
return True
else:
return False
@hooks.hook()
def config_changed():
@ -196,9 +75,8 @@ def config_changed():
corosync_key = config('corosync_key')
if not corosync_key:
log('CRITICAL',
'No Corosync key supplied, cannot proceed')
sys.exit(1)
raise Exception('No Corosync key supplied, cannot proceed')
hacluster.enable_lsb_services('pacemaker')
if configure_corosync():
@ -213,122 +91,31 @@ def upgrade_charm():
install()
def restart_corosync():
if service_running("pacemaker"):
service_stop("pacemaker")
service_restart("corosync")
service_start("pacemaker")
def restart_corosync_on_change():
'''Simple decorator to restart corosync if any of its config changes'''
def wrap(f):
def wrapped_f(*args):
checksums = {}
for path in COROSYNC_CONF_FILES:
checksums[path] = file_hash(path)
return_data = f(*args)
# NOTE: this assumes that this call is always done around
# configure_corosync, which returns true if configuration
# files where actually generated
if return_data:
for path in COROSYNC_CONF_FILES:
if checksums[path] != file_hash(path):
restart_corosync()
break
return return_data
return wrapped_f
return wrap
@restart_corosync_on_change()
def configure_corosync():
log('Configuring and (maybe) restarting corosync')
return emit_base_conf() and emit_corosync_conf()
def configure_monitor_host():
'''Configure extra monitor host for better network failure detection'''
log('Checking monitor host configuration')
monitor_host = config('monitor_host')
if monitor_host:
if not pcmk.crm_opt_exists('ping'):
log('Implementing monitor host'
' configuration (host: %s)' % monitor_host)
monitor_interval = config('monitor_interval')
cmd = 'crm -w -F configure primitive ping' \
' ocf:pacemaker:ping params host_list="%s"' \
' multiplier="100" op monitor interval="%s"' %\
(monitor_host, monitor_interval)
pcmk.commit(cmd)
cmd = 'crm -w -F configure clone cl_ping ping' \
' meta interleave="true"'
pcmk.commit(cmd)
else:
log('Reconfiguring monitor host'
' configuration (host: %s)' % monitor_host)
cmd = 'crm -w -F resource param ping set host_list="%s"' %\
monitor_host
else:
if pcmk.crm_opt_exists('ping'):
log('Disabling monitor host configuration')
pcmk.commit('crm -w -F resource stop ping')
pcmk.commit('crm -w -F configure delete ping')
def configure_cluster_global():
'''Configure global cluster options'''
log('Applying global cluster configuration')
if int(config('cluster_count')) >= 3:
# NOTE(jamespage) if 3 or more nodes, then quorum can be
# managed effectively, so stop if quorum lost
log('Configuring no-quorum-policy to stop')
cmd = "crm configure property no-quorum-policy=stop"
else:
# NOTE(jamespage) if less that 3 nodes, quorum not possible
# so ignore
log('Configuring no-quorum-policy to ignore')
cmd = "crm configure property no-quorum-policy=ignore"
pcmk.commit(cmd)
cmd = 'crm configure rsc_defaults $id="rsc-options"' \
' resource-stickiness="100"'
pcmk.commit(cmd)
def parse_data(relid, unit, key):
'''Simple helper to ast parse relation data'''
data = relation_get(key, unit, relid)
if data:
return ast.literal_eval(data)
else:
return {}
@hooks.hook('ha-relation-joined',
'ha-relation-changed',
'hanode-relation-joined',
'hanode-relation-changed')
def configure_principle_cluster_resources():
def ha_relation_changed():
# Check that we are related to a principle and that
# it has already provided the required corosync configuration
if not get_corosync_conf():
log('Unable to configure corosync right now, deferring configuration')
log('Unable to configure corosync right now, deferring configuration',
level=INFO)
return
if relation_ids('hanode'):
log('Ready to form cluster - informing peers', level=DEBUG)
relation_set(relation_id=relation_ids('hanode')[0], ready=True)
else:
if relation_ids('hanode'):
log('Ready to form cluster - informing peers')
relation_set(relation_id=relation_ids('hanode')[0],
ready=True)
else:
log('Ready to form cluster, but not related to peers just yet')
return
log('Ready to form cluster, but not related to peers just yet',
level=INFO)
return
# Check that there's enough nodes in order to perform the
# configuration of the HA cluster
if (len(get_cluster_nodes()) <
int(config('cluster_count'))):
log('Not enough nodes in cluster, deferring configuration')
if len(get_cluster_nodes()) < int(config('cluster_count')):
log('Not enough nodes in cluster, deferring configuration',
level=INFO)
return
relids = relation_ids('ha')
@ -337,11 +124,13 @@ def configure_principle_cluster_resources():
relid = relids[0]
units = related_units(relid)
if len(units) < 1:
log('No principle unit found, deferring configuration')
log('No principle unit found, deferring configuration',
level=INFO)
return
unit = units[0]
log('Parsing cluster configuration'
' using rid: {}, unit: {}'.format(relid, unit))
log('Parsing cluster configuration using rid: %s, unit: %s' %
(relid, unit), level=DEBUG)
resources = parse_data(relid, unit, 'resources')
delete_resources = parse_data(relid, unit, 'delete_resources')
resource_params = parse_data(relid, unit, 'resource_params')
@ -353,7 +142,7 @@ def configure_principle_cluster_resources():
locations = parse_data(relid, unit, 'locations')
init_services = parse_data(relid, unit, 'init_services')
else:
log('Related to {} ha services'.format(len(relids)))
log('Related to %s ha services' % (len(relids)), level=DEBUG)
return
if True in [ra.startswith('ocf:openstack')
@ -374,17 +163,16 @@ def configure_principle_cluster_resources():
# Only configure the cluster resources
# from the oldest peer unit.
if oldest_peer(peer_units()):
log('Deleting Resources')
log(delete_resources)
log('Deleting Resources' % (delete_resources), level=DEBUG)
for res_name in delete_resources:
if pcmk.crm_opt_exists(res_name):
log('Stopping and deleting resource %s' % res_name)
log('Stopping and deleting resource %s' % res_name,
level=DEBUG)
if pcmk.crm_res_running(res_name):
pcmk.commit('crm -w -F resource stop %s' % res_name)
pcmk.commit('crm -w -F configure delete %s' % res_name)
log('Configuring Resources')
log(resources)
log('Configuring Resources: %s' % (resources), level=DEBUG)
for res_name, res_type in resources.iteritems():
# disable the service we are going to put in HA
if res_type.split(':')[0] == "lsb":
@ -404,69 +192,62 @@ def configure_principle_cluster_resources():
cmd = 'crm -w -F configure primitive %s %s' % (res_name,
res_type)
else:
cmd = 'crm -w -F configure primitive %s %s %s' % \
(res_name,
res_type,
resource_params[res_name])
cmd = ('crm -w -F configure primitive %s %s %s' %
(res_name, res_type, resource_params[res_name]))
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
if config('monitor_host'):
cmd = 'crm -F configure location Ping-%s %s rule' \
' -inf: pingd lte 0' % (res_name, res_name)
cmd = ('crm -F configure location Ping-%s %s rule '
'-inf: pingd lte 0' % (res_name, res_name))
pcmk.commit(cmd)
log('Configuring Groups')
log(groups)
log('Configuring Groups: %s' % (groups), level=DEBUG)
for grp_name, grp_params in groups.iteritems():
if not pcmk.crm_opt_exists(grp_name):
cmd = 'crm -w -F configure group %s %s' % (grp_name,
grp_params)
cmd = ('crm -w -F configure group %s %s' %
(grp_name, grp_params))
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
log('Configuring Master/Slave (ms)')
log(ms)
log('Configuring Master/Slave (ms): %s' % (ms), level=DEBUG)
for ms_name, ms_params in ms.iteritems():
if not pcmk.crm_opt_exists(ms_name):
cmd = 'crm -w -F configure ms %s %s' % (ms_name, ms_params)
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
log('Configuring Orders')
log(orders)
log('Configuring Orders: %s' % (orders), level=DEBUG)
for ord_name, ord_params in orders.iteritems():
if not pcmk.crm_opt_exists(ord_name):
cmd = 'crm -w -F configure order %s %s' % (ord_name,
ord_params)
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
log('Configuring Colocations')
log(colocations)
log('Configuring Colocations: %s' % colocations, level=DEBUG)
for col_name, col_params in colocations.iteritems():
if not pcmk.crm_opt_exists(col_name):
cmd = 'crm -w -F configure colocation %s %s' % (col_name,
col_params)
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
log('Configuring Clones')
log(clones)
log('Configuring Clones: %s' % clones, level=DEBUG)
for cln_name, cln_params in clones.iteritems():
if not pcmk.crm_opt_exists(cln_name):
cmd = 'crm -w -F configure clone %s %s' % (cln_name,
cln_params)
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
log('Configuring Locations')
log(locations)
log('Configuring Locations: %s' % locations, level=DEBUG)
for loc_name, loc_params in locations.iteritems():
if not pcmk.crm_opt_exists(loc_name):
cmd = 'crm -w -F configure location %s %s' % (loc_name,
loc_params)
pcmk.commit(cmd)
log('%s' % cmd)
log('%s' % cmd, level=DEBUG)
for res_name, res_type in resources.iteritems():
if len(init_services) != 0 and res_name in init_services:
@ -494,88 +275,7 @@ def configure_principle_cluster_resources():
pcmk.commit(cmd)
for rel_id in relation_ids('ha'):
relation_set(relation_id=rel_id,
clustered="yes")
def configure_stonith():
if config('stonith_enabled') not in ['true', 'True', True]:
log('Disabling STONITH')
cmd = "crm configure property stonith-enabled=false"
pcmk.commit(cmd)
else:
log('Enabling STONITH for all nodes in cluster.')
# configure stontih resources for all nodes in cluster.
# note: this is totally provider dependent and requires
# access to the MAAS API endpoint, using endpoint and credentials
# set in config.
url = config('maas_url')
creds = config('maas_credentials')
if None in [url, creds]:
log('maas_url and maas_credentials must be set'
' in config to enable STONITH.')
sys.exit(1)
maas = MAAS.MAASHelper(url, creds)
nodes = maas.list_nodes()
if not nodes:
log('Could not obtain node inventory from '
'MAAS @ %s.' % url)
sys.exit(1)
cluster_nodes = pcmk.list_nodes()
for node in cluster_nodes:
rsc, constraint = pcmk.maas_stonith_primitive(nodes, node)
if not rsc:
log('Failed to determine STONITH primitive for node'
' %s' % node)
sys.exit(1)
rsc_name = str(rsc).split(' ')[1]
if not pcmk.is_resource_present(rsc_name):
log('Creating new STONITH primitive %s.' %
rsc_name)
cmd = 'crm -F configure %s' % rsc
pcmk.commit(cmd)
if constraint:
cmd = 'crm -F configure %s' % constraint
pcmk.commit(cmd)
else:
log('STONITH primitive already exists '
'for node.')
cmd = "crm configure property stonith-enabled=true"
pcmk.commit(cmd)
def get_cluster_nodes():
hosts = []
hosts.append(unit_get('private-address'))
for relid in relation_ids('hanode'):
for unit in related_units(relid):
if relation_get('ready',
rid=relid,
unit=unit):
hosts.append(relation_get('private-address',
unit, relid))
hosts.sort()
return hosts
TEMPLATES_DIR = 'templates'
try:
import jinja2
except ImportError:
apt_install('python-jinja2', fatal=True)
import jinja2
def render_template(template_name, context, template_dir=TEMPLATES_DIR):
templates = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_dir)
)
template = templates.get_template(template_name)
return template.render(context)
relation_set(relation_id=rel_id, clustered="yes")
@hooks.hook()
@ -585,15 +285,8 @@ def stop():
apt_purge(['corosync', 'pacemaker'], fatal=True)
def assert_charm_supports_ipv6():
"""Check whether we are able to support charms ipv6."""
if lsb_release()['DISTRIB_CODENAME'].lower() < "trusty":
raise Exception("IPv6 is not supported in the charms for Ubuntu "
"versions less than Trusty 14.04")
if __name__ == '__main__':
try:
hooks.execute(sys.argv)
except UnregisteredHookError as e:
log('Unknown hook {} - skipping.'.format(e))
log('Unknown hook {} - skipping.'.format(e), level=DEBUG)

View File

@ -4,7 +4,10 @@ import json
import subprocess
from charmhelpers.fetch import apt_install
from charmhelpers.core.hookenv import log, ERROR
from charmhelpers.core.hookenv import (
log,
ERROR,
)
MAAS_STABLE_PPA = 'ppa:maas-maintainers/stable '
MAAS_PROFILE_NAME = 'maas-juju-hacluster'
@ -18,10 +21,10 @@ class MAASHelper(object):
self.install_maas_cli()
def install_maas_cli(self):
'''
Ensure maas-cli is installed. Fallback to MAAS stable PPA when
needed.
'''
"""Ensure maas-cli is installed
Fallback to MAAS stable PPA when needed.
"""
apt.init()
cache = apt.Cache()
@ -59,5 +62,6 @@ class MAASHelper(object):
except subprocess.CalledProcessError:
log('Could not get node inventory from MAAS.', ERROR)
return False
self.logout()
return json.loads(out)

View File

@ -2,7 +2,10 @@ import commands
import subprocess
import socket
from charmhelpers.core.hookenv import log, ERROR
from charmhelpers.core.hookenv import (
log,
ERROR
)
def wait_for_pcmk():
@ -21,6 +24,7 @@ def is_resource_present(resource):
status = commands.getstatusoutput("crm resource status %s" % resource)[0]
if status != 0:
return False
return True
@ -29,6 +33,7 @@ def standby(node=None):
cmd = "crm -F node standby"
else:
cmd = "crm -F node standby %s" % node
commit(cmd)
@ -37,6 +42,7 @@ def online(node=None):
cmd = "crm -F node online"
else:
cmd = "crm -F node online %s" % node
commit(cmd)
@ -44,6 +50,7 @@ def crm_opt_exists(opt_name):
output = commands.getstatusoutput("crm configure show")[1]
if opt_name in output:
return True
return False
@ -51,8 +58,8 @@ def crm_res_running(opt_name):
(c, output) = commands.getstatusoutput("crm resource status %s" % opt_name)
if output.startswith("resource %s is running" % opt_name):
return True
else:
return False
return False
def list_nodes():
@ -62,20 +69,21 @@ def list_nodes():
for line in str(out).split('\n'):
if line != '':
nodes.append(line.split(':')[0])
return nodes
def _maas_ipmi_stonith_resource(node, power_params):
rsc_name = 'res_stonith_%s' % node
rsc = 'primitive %s stonith:external/ipmi' % rsc_name
rsc += ' params hostname=%s ipaddr=%s userid=%s passwd=%s interface=lan' %\
(node, power_params['power_address'],
power_params['power_user'], power_params['power_pass'])
rsc = ('primitive %s stonith:external/ipmi params hostname=%s ipaddr=%s '
'userid=%s passwd=%s interface=lan' %
(rsc_name, node, power_params['power_address'],
power_params['power_user'], power_params['power_pass']))
# ensure ipmi stonith agents are not running on the nodes that
# they manage.
constraint = 'location const_loc_stonith_avoid_%s %s -inf: %s' %\
(node, rsc_name, node)
constraint = ('location const_loc_stonith_avoid_%s %s -inf: %s' %
(node, rsc_name, node))
return rsc, constraint

331
hooks/utils.py Normal file
View File

@ -0,0 +1,331 @@
#!/usr/bin/python
import ast
import pcmk
import maas
import hacluster
from base64 import b64decode
from charmhelpers.core.hookenv import (
local_unit,
log,
DEBUG,
INFO,
relation_get,
related_units,
relation_ids,
config,
unit_private_ip,
unit_get,
)
from charmhelpers.contrib.openstack.utils import get_host_ip
from charmhelpers.core.host import (
service_start,
service_stop,
service_restart,
service_running,
write_file,
file_hash,
lsb_release
)
from charmhelpers.fetch import (
apt_install,
)
from charmhelpers.contrib.hahelpers.cluster import (
peer_ips,
)
try:
import jinja2
except ImportError:
apt_install('python-jinja2', fatal=True)
import jinja2
TEMPLATES_DIR = 'templates'
COROSYNC_CONF = '/etc/corosync/corosync.conf'
COROSYNC_DEFAULT = '/etc/default/corosync'
COROSYNC_AUTHKEY = '/etc/corosync/authkey'
COROSYNC_CONF_FILES = [
COROSYNC_DEFAULT,
COROSYNC_AUTHKEY,
COROSYNC_CONF
]
SUPPORTED_TRANSPORTS = ['udp', 'udpu', 'multicast', 'unicast']
def get_corosync_id(unit_name):
# Corosync nodeid 0 is reserved so increase all the nodeids to avoid it
off_set = 1000
return off_set + int(unit_name.split('/')[1])
def get_ha_nodes():
ha_units = peer_ips(peer_relation='hanode')
ha_units[local_unit()] = unit_private_ip()
ha_nodes = {}
for unit in ha_units:
corosync_id = get_corosync_id(unit)
ha_nodes[corosync_id] = get_host_ip(ha_units[unit])
return ha_nodes
def get_corosync_conf():
if config('prefer-ipv6'):
ip_version = 'ipv6'
bindnetaddr = hacluster.get_ipv6_network_address
else:
ip_version = 'ipv4'
bindnetaddr = hacluster.get_network_address
# NOTE(jamespage) use local charm configuration over any provided by
# principle charm
conf = {
'corosync_bindnetaddr':
bindnetaddr(config('corosync_bindiface')),
'corosync_mcastport': config('corosync_mcastport'),
'corosync_mcastaddr': config('corosync_mcastaddr'),
'ip_version': ip_version,
'ha_nodes': get_ha_nodes(),
'transport': get_transport(),
'debug': config('debug'),
}
if None not in conf.itervalues():
return conf
conf = {}
for relid in relation_ids('ha'):
for unit in related_units(relid):
bindiface = relation_get('corosync_bindiface',
unit, relid)
conf = {
'corosync_bindnetaddr': bindnetaddr(bindiface),
'corosync_mcastport': relation_get('corosync_mcastport',
unit, relid),
'corosync_mcastaddr': config('corosync_mcastaddr'),
'ip_version': ip_version,
'ha_nodes': get_ha_nodes(),
'transport': get_transport(),
'debug': config('debug'),
}
if config('prefer-ipv6'):
conf['nodeid'] = get_corosync_id(local_unit())
conf['netmtu'] = config('netmtu')
if None not in conf.itervalues():
return conf
missing = [k for k, v in conf.iteritems() if v is None]
log('Missing required configuration: %s' % missing)
return None
def emit_corosync_conf():
corosync_conf_context = get_corosync_conf()
if corosync_conf_context:
write_file(path=COROSYNC_CONF,
content=render_template('corosync.conf',
corosync_conf_context))
return True
return False
def emit_base_conf():
corosync_default_context = {'corosync_enabled': 'yes'}
write_file(path=COROSYNC_DEFAULT,
content=render_template('corosync',
corosync_default_context))
corosync_key = config('corosync_key')
if corosync_key:
write_file(path=COROSYNC_AUTHKEY,
content=b64decode(corosync_key),
perms=0o400)
return True
return False
def render_template(template_name, context, template_dir=TEMPLATES_DIR):
templates = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_dir)
)
template = templates.get_template(template_name)
return template.render(context)
def assert_charm_supports_ipv6():
"""Check whether we are able to support charms ipv6."""
if lsb_release()['DISTRIB_CODENAME'].lower() < "trusty":
raise Exception("IPv6 is not supported in the charms for Ubuntu "
"versions less than Trusty 14.04")
def get_transport():
transport = config('corosync_transport')
_deprecated_transport_values = {"multicast": "udp", "unicast": "udpu"}
val = _deprecated_transport_values.get(transport, transport)
if val not in ['udp', 'udpu']:
msg = ("Unsupported corosync_transport type '%s' - supported "
"types are: %s" % (transport, ', '.join(SUPPORTED_TRANSPORTS)))
raise ValueError(msg)
return val
def get_cluster_nodes():
hosts = []
hosts.append(unit_get('private-address'))
for relid in relation_ids('hanode'):
for unit in related_units(relid):
if relation_get('ready', rid=relid, unit=unit):
hosts.append(relation_get('private-address', unit, relid))
hosts.sort()
return hosts
def parse_data(relid, unit, key):
"""Simple helper to ast parse relation data"""
data = relation_get(key, unit, relid)
if data:
return ast.literal_eval(data)
return {}
def configure_stonith():
if config('stonith_enabled') not in ['true', 'True', True]:
log('Disabling STONITH', level=INFO)
cmd = "crm configure property stonith-enabled=false"
pcmk.commit(cmd)
else:
log('Enabling STONITH for all nodes in cluster.', level=INFO)
# configure stontih resources for all nodes in cluster.
# note: this is totally provider dependent and requires
# access to the MAAS API endpoint, using endpoint and credentials
# set in config.
url = config('maas_url')
creds = config('maas_credentials')
if None in [url, creds]:
raise Exception('maas_url and maas_credentials must be set '
'in config to enable STONITH.')
nodes = maas.MAASHelper(url, creds).list_nodes()
if not nodes:
raise Exception('Could not obtain node inventory from '
'MAAS @ %s.' % url)
cluster_nodes = pcmk.list_nodes()
for node in cluster_nodes:
rsc, constraint = pcmk.maas_stonith_primitive(nodes, node)
if not rsc:
raise Exception('Failed to determine STONITH primitive for '
'node %s' % node)
rsc_name = str(rsc).split(' ')[1]
if not pcmk.is_resource_present(rsc_name):
log('Creating new STONITH primitive %s.' % rsc_name,
level=DEBUG)
cmd = 'crm -F configure %s' % rsc
pcmk.commit(cmd)
if constraint:
cmd = 'crm -F configure %s' % constraint
pcmk.commit(cmd)
else:
log('STONITH primitive already exists for node.', level=DEBUG)
pcmk.commit("crm configure property stonith-enabled=true")
def configure_monitor_host():
"""Configure extra monitor host for better network failure detection"""
log('Checking monitor host configuration', level=DEBUG)
monitor_host = config('monitor_host')
if monitor_host:
if not pcmk.crm_opt_exists('ping'):
log('Implementing monitor host configuration (host: %s)' %
monitor_host, level=DEBUG)
monitor_interval = config('monitor_interval')
cmd = ('crm -w -F configure primitive ping '
'ocf:pacemaker:ping params host_list="%s" '
'multiplier="100" op monitor interval="%s" ' %
(monitor_host, monitor_interval))
pcmk.commit(cmd)
cmd = ('crm -w -F configure clone cl_ping ping '
'meta interleave="true"')
pcmk.commit(cmd)
else:
log('Reconfiguring monitor host configuration (host: %s)' %
monitor_host, level=DEBUG)
cmd = ('crm -w -F resource param ping set host_list="%s"' %
monitor_host)
else:
if pcmk.crm_opt_exists('ping'):
log('Disabling monitor host configuration', level=DEBUG)
pcmk.commit('crm -w -F resource stop ping')
pcmk.commit('crm -w -F configure delete ping')
def configure_cluster_global():
"""Configure global cluster options"""
log('Applying global cluster configuration', level=DEBUG)
if int(config('cluster_count')) >= 3:
# NOTE(jamespage) if 3 or more nodes, then quorum can be
# managed effectively, so stop if quorum lost
log('Configuring no-quorum-policy to stop', level=DEBUG)
cmd = "crm configure property no-quorum-policy=stop"
else:
# NOTE(jamespage) if less that 3 nodes, quorum not possible
# so ignore
log('Configuring no-quorum-policy to ignore', level=DEBUG)
cmd = "crm configure property no-quorum-policy=ignore"
pcmk.commit(cmd)
cmd = ('crm configure rsc_defaults $id="rsc-options '
'resource-stickiness="100"')
pcmk.commit(cmd)
def restart_corosync_on_change():
"""Simple decorator to restart corosync if any of its config changes"""
def wrap(f):
def wrapped_f(*args, **kwargs):
checksums = {}
for path in COROSYNC_CONF_FILES:
checksums[path] = file_hash(path)
return_data = f(*args, **kwargs)
# NOTE: this assumes that this call is always done around
# configure_corosync, which returns true if configuration
# files where actually generated
if return_data:
for path in COROSYNC_CONF_FILES:
if checksums[path] != file_hash(path):
restart_corosync()
break
return return_data
return wrapped_f
return wrap
@restart_corosync_on_change()
def configure_corosync():
log('Configuring and (maybe) restarting corosync', level=DEBUG)
return emit_base_conf() and emit_corosync_conf()
def restart_corosync():
if service_running("pacemaker"):
service_stop("pacemaker")
service_restart("corosync")
service_start("pacemaker")

View File

@ -1 +0,0 @@
68

View File

@ -30,4 +30,4 @@ class BundleTest(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@ -1,123 +1,42 @@
from __future__ import print_function
import mock
import os
import re
import shutil
import tempfile
import unittest
with mock.patch('charmhelpers.core.hookenv.config'):
import hooks as hacluster_hooks
import hooks
def local_log(msg, level='INFO'):
print('[{}] {}'.format(level, msg))
def write_file(path, content, *args, **kwargs):
with open(path, 'w') as f:
f.write(content)
f.flush()
class SwiftContextTestCase(unittest.TestCase):
@mock.patch('hooks.config')
def test_get_transport(self, mock_config):
mock_config.return_value = 'udp'
self.assertEqual('udp', hacluster_hooks.get_transport())
mock_config.return_value = 'udpu'
self.assertEqual('udpu', hacluster_hooks.get_transport())
mock_config.return_value = 'hafu'
self.assertRaises(ValueError, hacluster_hooks.get_transport)
@mock.patch('hooks.log', local_log)
@mock.patch('hooks.write_file', write_file)
@mock.patch.object(hooks, 'log', lambda *args, **kwargs: None)
@mock.patch('utils.COROSYNC_CONF', os.path.join(tempfile.mkdtemp(),
'corosync.conf'))
class TestCorosyncConf(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
hacluster_hooks.COROSYNC_CONF = os.path.join(self.tmpdir,
'corosync.conf')
def tearDown(self):
shutil.rmtree(self.tmpdir)
def test_debug_on(self):
self.check_debug(True)
def test_debug_off(self):
self.check_debug(False)
@mock.patch('hooks.relation_get')
@mock.patch('hooks.related_units')
@mock.patch('hooks.relation_ids')
@mock.patch('hacluster.get_network_address')
@mock.patch('hooks.config')
def check_debug(self, enabled, mock_config, get_network_address,
relation_ids, related_units, relation_get):
cfg = {'debug': enabled,
'prefer-ipv6': False,
'corosync_transport': 'udpu',
'corosync_mcastaddr': 'corosync_mcastaddr'}
def c(k):
return cfg.get(k)
mock_config.side_effect = c
get_network_address.return_value = "127.0.0.1"
relation_ids.return_value = ['foo:1']
related_units.return_value = ['unit-machine-0']
relation_get.return_value = 'iface'
hacluster_hooks.get_ha_nodes = mock.MagicMock()
conf = hacluster_hooks.get_corosync_conf()
self.assertEqual(conf['debug'], enabled)
self.assertTrue(hacluster_hooks.emit_corosync_conf())
with open(hacluster_hooks.COROSYNC_CONF) as fd:
content = fd.read()
if enabled:
pattern = 'debug: on\n'
else:
pattern = 'debug: off\n'
matches = re.findall(pattern, content, re.M)
self.assertEqual(len(matches), 2, str(matches))
@mock.patch('pcmk.wait_for_pcmk')
@mock.patch('hooks.peer_units')
@mock.patch.object(hooks, 'peer_units')
@mock.patch('pcmk.crm_opt_exists')
@mock.patch('hooks.oldest_peer')
@mock.patch('hooks.configure_corosync')
@mock.patch('hooks.configure_cluster_global')
@mock.patch('hooks.configure_monitor_host')
@mock.patch('hooks.configure_stonith')
@mock.patch('hooks.related_units')
@mock.patch('hooks.get_cluster_nodes')
@mock.patch('hooks.relation_set')
@mock.patch('hooks.relation_ids')
@mock.patch('hooks.get_corosync_conf')
@mock.patch.object(hooks, 'oldest_peer')
@mock.patch.object(hooks, 'configure_corosync')
@mock.patch.object(hooks, 'configure_cluster_global')
@mock.patch.object(hooks, 'configure_monitor_host')
@mock.patch.object(hooks, 'configure_stonith')
@mock.patch.object(hooks, 'related_units')
@mock.patch.object(hooks, 'get_cluster_nodes')
@mock.patch.object(hooks, 'relation_set')
@mock.patch.object(hooks, 'relation_ids')
@mock.patch.object(hooks, 'get_corosync_conf')
@mock.patch('pcmk.commit')
@mock.patch('hooks.config')
@mock.patch('hooks.parse_data')
def test_configure_principle_cluster_resources(self, parse_data, config,
commit,
get_corosync_conf,
relation_ids, relation_set,
get_cluster_nodes,
related_units,
configure_stonith,
configure_monitor_host,
configure_cluster_global,
configure_corosync,
oldest_peer, crm_opt_exists,
peer_units, wait_for_pcmk):
@mock.patch.object(hooks, 'config')
@mock.patch.object(hooks, 'parse_data')
def test_ha_relation_changed(self, parse_data, config, commit,
get_corosync_conf, relation_ids, relation_set,
get_cluster_nodes, related_units,
configure_stonith, configure_monitor_host,
configure_cluster_global, configure_corosync,
oldest_peer, crm_opt_exists, peer_units,
wait_for_pcmk):
crm_opt_exists.return_value = False
oldest_peer.return_value = True
related_units.return_value = ['ha/0', 'ha/1', 'ha/2']
@ -130,10 +49,7 @@ class TestCorosyncConf(unittest.TestCase):
'corosync_mcastaddr': 'corosync_mcastaddr',
'cluster_count': 3}
def c(k):
return cfg.get(k)
config.side_effect = c
config.side_effect = lambda key: cfg.get(key)
rel_get_data = {'locations': {'loc_foo': 'bar rule inf: meh eq 1'},
'clones': {'cl_foo': 'res_foo meta interleave=true'},
@ -150,7 +66,7 @@ class TestCorosyncConf(unittest.TestCase):
parse_data.side_effect = fake_parse_data
hacluster_hooks.configure_principle_cluster_resources()
hooks.ha_relation_changed()
relation_set.assert_any_call(relation_id='hanode:1', ready=True)
configure_stonith.assert_called_with()
configure_monitor_host.assert_called_with()

View File

@ -0,0 +1,80 @@
import mock
import os
import re
import shutil
import tempfile
import unittest
import utils
def write_file(path, content, *args, **kwargs):
with open(path, 'w') as f:
f.write(content)
f.flush()
@mock.patch.object(utils, 'log', lambda *args, **kwargs: None)
@mock.patch.object(utils, 'write_file', write_file)
class UtilsTestCase(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
utils.COROSYNC_CONF = os.path.join(self.tmpdir, 'corosync.conf')
def tearDown(self):
shutil.rmtree(self.tmpdir)
@mock.patch.object(utils, 'relation_get')
@mock.patch.object(utils, 'related_units')
@mock.patch.object(utils, 'relation_ids')
@mock.patch('hacluster.get_network_address')
@mock.patch.object(utils, 'config')
def check_debug(self, enabled, mock_config, get_network_address,
relation_ids, related_units, relation_get):
cfg = {'debug': enabled,
'prefer-ipv6': False,
'corosync_transport': 'udpu',
'corosync_mcastaddr': 'corosync_mcastaddr'}
def c(k):
return cfg.get(k)
mock_config.side_effect = c
get_network_address.return_value = "127.0.0.1"
relation_ids.return_value = ['foo:1']
related_units.return_value = ['unit-machine-0']
relation_get.return_value = 'iface'
utils.get_ha_nodes = mock.MagicMock()
conf = utils.get_corosync_conf()
self.assertEqual(conf['debug'], enabled)
self.assertTrue(utils.emit_corosync_conf())
with open(utils.COROSYNC_CONF) as fd:
content = fd.read()
if enabled:
pattern = 'debug: on\n'
else:
pattern = 'debug: off\n'
matches = re.findall(pattern, content, re.M)
self.assertEqual(len(matches), 2, str(matches))
def test_debug_on(self):
self.check_debug(True)
def test_debug_off(self):
self.check_debug(False)
@mock.patch.object(utils, 'config')
def test_get_transport(self, mock_config):
mock_config.return_value = 'udp'
self.assertEqual('udp', utils.get_transport())
mock_config.return_value = 'udpu'
self.assertEqual('udpu', utils.get_transport())
mock_config.return_value = 'hafu'
self.assertRaises(ValueError, utils.get_transport)