charm-percona-cluster/tests/basic_deployment.py

402 lines
14 KiB
Python

# basic deployment test class for percona-xtradb-cluster
import amulet
import re
import os
import socket
import time
import telnetlib
import yaml
from charmhelpers.contrib.openstack.amulet.deployment import (
OpenStackAmuletDeployment
)
from charmhelpers.contrib.amulet.utils import AmuletUtils
PXC_ROOT_PASSWD = 'ubuntu'
class BasicDeployment(OpenStackAmuletDeployment):
utils = AmuletUtils()
def __init__(self, vip=None, units=1, series="trusty", openstack=None,
source=None, stable=False):
super(BasicDeployment, self).__init__(series, openstack, source,
stable)
self.units = units
self.master_unit = None
self.vip = None
self.ha = False
if units > 1:
self.ha = True
if vip:
self.vip = vip
elif 'AMULET_OS_VIP' in os.environ:
self.vip = os.environ.get('AMULET_OS_VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("Please set the vip in local.yaml or "
"env var AMULET_OS_VIP to run this test "
"suite"))
self.log = self.utils.get_logger()
def _add_services(self):
"""Add services
Add the services that we're testing, where percona-cluster is local,
and the rest of the service are from lp branches that are
compatible with the local charm (e.g. stable or next).
"""
this_service = {'name': 'percona-cluster',
'units': self.units}
other_services = []
if self.units > 1 and self.ha:
other_services.append({'name': 'hacluster'})
super(BasicDeployment, self)._add_services(this_service,
other_services)
def _add_relations(self):
"""Add all of the relations for the services."""
if self.units > 1 and self.ha:
relations = {'percona-cluster:ha': 'hacluster:ha'}
super(BasicDeployment, self)._add_relations(relations)
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'min-cluster-size': self.units,
'vip': self.vip,
'root-password': PXC_ROOT_PASSWD}
cfg_ha = {'debug': True,
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {}
if self.units > 1 and self.ha:
cfg_ha['cluster_count'] = str(self.units)
configs['hacluster'] = cfg_ha
configs['percona-cluster'] = cfg_percona
return configs
def _configure_services(self):
super(BasicDeployment, self)._configure_services(self._get_configs())
def run(self):
self._add_services()
self._add_relations()
self._configure_services()
self._deploy()
self.d.sentry.wait()
self.test_deployment()
def test_deployment(self):
'''Top level test function executor'''
self.test_pacemaker()
self.test_pxc_running()
self.test_bootstrapped_and_clustered()
self.test_bootstrap_uuid_set_in_the_relation()
self.test_restart_on_config_change()
self.test_pause_resume()
if self.ha:
self.test_kill_master()
def test_pacemaker(self):
'''
Ensure that pacemaker and corosync are correctly configured in
clustered deployments.
side effect: self.master_unit should be set after execution
'''
if self.units > 1 and self.ha:
i = 0
while i < 30 and not self.master_unit:
self.master_unit = self.find_master(ha=self.ha)
i += 1
time.sleep(10)
msg = 'percona-cluster vip not found'
assert self.master_unit is not None, msg
_, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
resources = ['res_mysql_vip']
resources += ['res_mysql_monitor:%d' %
m for m in range(self.units)]
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
else:
self.master_unit = self.find_master(ha=self.ha)
def test_pxc_running(self):
'''
Ensure PXC is running on all units
'''
for unit in self.d.sentry['percona-cluster']:
assert self.is_mysqld_running(unit), 'mysql not running: %s' % unit
def test_bootstrapped_and_clustered(self):
'''
Ensure PXC is bootstrapped and that peer units are clustered
'''
self.log.info('Ensuring PXC is bootstrapped')
msg = "Percona cluster failed to bootstrap"
assert self.is_pxc_bootstrapped(), msg
self.log.info('Checking PXC cluster size == {}'.format(self.units))
got = int(self.get_cluster_size())
msg = ("Percona cluster unexpected size"
" (wanted=%s, got=%s)" % (self.units, got))
assert got == self.units, msg
def test_bootstrap_uuid_set_in_the_relation(self):
"""Verify that the bootstrap-uuid attribute was set by the leader and
all the peers where notified.
"""
(leader_uuid, code) = self.master_unit.run("leader-get bootstrap-uuid")
assert leader_uuid
cmd_rel_get = ("relation-get -r `relation-ids cluster` "
"bootstrap-uuid %s")
units = self.d.sentry['percona-cluster']
for unit in units:
for peer in units:
cmd = cmd_rel_get % peer.info['unit_name']
self.log.debug(cmd)
(output, code) = unit.run(cmd)
assert code == 0
assert output == leader_uuid, "%s != %s" % (output,
leader_uuid)
def test_pause_resume(self):
'''
Ensure pasue/resume actions stop/start mysqld on units
'''
self.log.info('Testing pause/resume actions')
self.log.info('Pausing service on first PXC unit')
unit = self.d.sentry['percona-cluster'][0]
assert self.is_mysqld_running(unit), 'mysql not running'
assert self.utils.status_get(unit)[0] == "active"
action_id = self.utils.run_action(unit, "pause")
assert self.utils.wait_on_action(action_id), "Pause action failed."
self.d.sentry.wait()
# Note that is_mysqld_running will print an error message when
# mysqld is not running. This is by design but it looks odd
# in the output.
assert not self.is_mysqld_running(unit=unit), \
"mysqld is still running!"
self.log.info('Resuming service on first PXC unit')
assert self.utils.status_get(unit)[0] == "maintenance"
action_id = self.utils.run_action(unit, "resume")
assert self.utils.wait_on_action(action_id), "Resume action failed"
assert self.utils.status_get(unit)[0] == "active"
assert self.is_mysqld_running(unit=unit), \
"mysqld not running after resume."
self._auto_wait_for_status()
def test_kill_master(self):
'''
Ensure that killing the mysqld on the master unit results
in a VIP failover
'''
self.log.info('Testing failover of master unit on mysqld failure')
# we are going to kill the master
old_master = self.master_unit
self.log.info(
'kill -9 mysqld on {}'.format(self.master_unit.info['unit_name'])
)
self.master_unit.run('sudo killall -9 mysqld')
self.log.info('looking for the new master')
i = 0
changed = False
while i < 10 and not changed:
i += 1
time.sleep(5) # give some time to pacemaker to react
new_master = self.find_master(ha=self.ha)
if (new_master and new_master.info['unit_name'] !=
old_master.info['unit_name']):
self.log.info(
'New master unit detected'
' on {}'.format(new_master.info['unit_name'])
)
changed = True
assert changed, "The master didn't change"
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
def test_change_root_password(self):
"""
Change root password and verify the change was effectively applied.
"""
new_root_passwd = 'openstack'
u = self.master_unit
root_password, _ = PXC_ROOT_PASSWD
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(root_password)
output, code = u.run(cmd)
assert code == 0, output
self.d.configure('percona-cluster', {'root-password': new_root_passwd})
time.sleep(5) # give some time to the unit to start the hook
self.d.sentry.wait() # wait until the hook finishes
# try to connect using the new root password
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(new_root_passwd)
output, code = u.run(cmd)
assert code == 0, output
def find_master(self, ha=True):
for unit in self.d.sentry['percona-cluster']:
if not ha:
return unit
# is the vip running here?
output, code = unit.run('sudo ip a | grep "inet %s/"' % self.vip)
self.log.info("Checking {}".format(unit.info['unit_name']))
self.log.debug(output)
if code == 0:
self.log.info('vip ({}) running in {}'.format(
self.vip,
unit.info['unit_name'])
)
return unit
def get_pcmkr_resources(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
output, code = u.run('sudo crm_resource -l')
assert code == 0, 'could not get "crm resource list"'
return output.split('\n')
def is_mysqld_running(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
_, code = u.run('pidof mysqld')
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return False
return True
def get_wsrep_value(self, attr, unit=None):
if unit:
u = unit
else:
u = self.master_unit
root_password, _ = u.run('leader-get root-password')
cmd = ("mysql -uroot -p{} -e\"show status like '{}';\"| "
"grep {}".format(root_password, attr, attr))
output, code = u.run(cmd)
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return ""
value = re.search(r"^.+?\s+(.+)", output).group(1)
self.log.info("%s = %s" % (attr, value))
return value
def is_pxc_bootstrapped(self, unit=None):
value = self.get_wsrep_value('wsrep_ready', unit)
return value.lower() in ['on', 'ready']
def get_cluster_size(self, unit=None):
return self.get_wsrep_value('wsrep_cluster_size', unit)
def is_port_open(self, unit=None, port='3306', address=None):
if unit:
addr = unit.info['public-address']
elif address:
addr = address
else:
raise Exception('Please provide a unit or address')
try:
telnetlib.Telnet(addr, port)
return True
except socket.error as e:
if e.errno == 113:
self.log.error("could not connect to %s:%s" % (addr, port))
if e.errno == 111:
self.log.error("connection refused connecting"
" to %s:%s" % (addr,
port))
return False
def resolve_cnf_file(self):
if self._get_openstack_release() < self.xenial_mitaka:
return '/etc/mysql/my.cnf'
else:
return '/etc/mysql/percona-xtradb-cluster.conf.d/mysqld.cnf'
def test_restart_on_config_change(self):
"""Verify that the specified services are restarted when the
config is changed."""
sentry = self.d.sentry['percona-cluster'][0]
juju_service = 'percona-cluster'
# Expected default and alternate values
set_default = {'peer-timeout': 'PT3S'}
set_alternate = {'peer-timeout': 'PT15S'}
# Config file affected by juju set config change
conf_file = self.resolve_cnf_file()
# Services which are expected to restart upon config change
services = {
'mysqld': conf_file,
}
# Make config change, check for service restarts
self.utils.log.debug('Making config change on {}...'
.format(juju_service))
mtime = self.utils.get_sentry_time(sentry)
self.d.configure(juju_service, set_alternate)
self._auto_wait_for_status()
sleep_time = 40
for s, conf_file in services.iteritems():
self.utils.log.debug("Checking that service restarted: {}"
.format(s))
if not self.utils.validate_service_config_changed(
sentry, mtime, s, conf_file, retry_count=5,
retry_sleep_time=sleep_time,
sleep_time=sleep_time):
self.d.configure(juju_service, set_default)
msg = "service {} didn't restart after config change".format(s)
amulet.raise_status(amulet.FAIL, msg=msg)
sleep_time = 0
self.d.configure(juju_service, set_default)
self._auto_wait_for_status()