402 lines
14 KiB
Python
402 lines
14 KiB
Python
# basic deployment test class for percona-xtradb-cluster
|
|
|
|
import amulet
|
|
import re
|
|
import os
|
|
import socket
|
|
import time
|
|
import telnetlib
|
|
import yaml
|
|
from charmhelpers.contrib.openstack.amulet.deployment import (
|
|
OpenStackAmuletDeployment
|
|
)
|
|
from charmhelpers.contrib.amulet.utils import AmuletUtils
|
|
|
|
PXC_ROOT_PASSWD = 'ubuntu'
|
|
|
|
|
|
class BasicDeployment(OpenStackAmuletDeployment):
|
|
|
|
utils = AmuletUtils()
|
|
|
|
def __init__(self, vip=None, units=1, series="trusty", openstack=None,
|
|
source=None, stable=False):
|
|
super(BasicDeployment, self).__init__(series, openstack, source,
|
|
stable)
|
|
self.units = units
|
|
self.master_unit = None
|
|
self.vip = None
|
|
self.ha = False
|
|
if units > 1:
|
|
self.ha = True
|
|
if vip:
|
|
self.vip = vip
|
|
elif 'AMULET_OS_VIP' in os.environ:
|
|
self.vip = os.environ.get('AMULET_OS_VIP')
|
|
elif os.path.isfile('local.yaml'):
|
|
with open('local.yaml', 'rb') as f:
|
|
self.cfg = yaml.safe_load(f.read())
|
|
|
|
self.vip = self.cfg.get('vip')
|
|
else:
|
|
amulet.raise_status(amulet.SKIP,
|
|
("Please set the vip in local.yaml or "
|
|
"env var AMULET_OS_VIP to run this test "
|
|
"suite"))
|
|
self.log = self.utils.get_logger()
|
|
|
|
def _add_services(self):
|
|
"""Add services
|
|
|
|
Add the services that we're testing, where percona-cluster is local,
|
|
and the rest of the service are from lp branches that are
|
|
compatible with the local charm (e.g. stable or next).
|
|
"""
|
|
this_service = {'name': 'percona-cluster',
|
|
'units': self.units}
|
|
other_services = []
|
|
if self.units > 1 and self.ha:
|
|
other_services.append({'name': 'hacluster'})
|
|
|
|
super(BasicDeployment, self)._add_services(this_service,
|
|
other_services)
|
|
|
|
def _add_relations(self):
|
|
"""Add all of the relations for the services."""
|
|
|
|
if self.units > 1 and self.ha:
|
|
relations = {'percona-cluster:ha': 'hacluster:ha'}
|
|
super(BasicDeployment, self)._add_relations(relations)
|
|
|
|
def _get_configs(self):
|
|
"""Configure all of the services."""
|
|
cfg_percona = {'min-cluster-size': self.units,
|
|
'vip': self.vip,
|
|
'root-password': PXC_ROOT_PASSWD}
|
|
|
|
cfg_ha = {'debug': True,
|
|
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
|
|
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
|
|
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
|
|
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
|
|
'C1w=')}
|
|
|
|
configs = {}
|
|
if self.units > 1 and self.ha:
|
|
cfg_ha['cluster_count'] = str(self.units)
|
|
configs['hacluster'] = cfg_ha
|
|
configs['percona-cluster'] = cfg_percona
|
|
|
|
return configs
|
|
|
|
def _configure_services(self):
|
|
super(BasicDeployment, self)._configure_services(self._get_configs())
|
|
|
|
def run(self):
|
|
self._add_services()
|
|
self._add_relations()
|
|
self._configure_services()
|
|
self._deploy()
|
|
self.d.sentry.wait()
|
|
self.test_deployment()
|
|
|
|
def test_deployment(self):
|
|
'''Top level test function executor'''
|
|
self.test_pacemaker()
|
|
self.test_pxc_running()
|
|
self.test_bootstrapped_and_clustered()
|
|
self.test_bootstrap_uuid_set_in_the_relation()
|
|
self.test_restart_on_config_change()
|
|
self.test_pause_resume()
|
|
if self.ha:
|
|
self.test_kill_master()
|
|
|
|
def test_pacemaker(self):
|
|
'''
|
|
Ensure that pacemaker and corosync are correctly configured in
|
|
clustered deployments.
|
|
|
|
side effect: self.master_unit should be set after execution
|
|
'''
|
|
|
|
if self.units > 1 and self.ha:
|
|
i = 0
|
|
while i < 30 and not self.master_unit:
|
|
self.master_unit = self.find_master(ha=self.ha)
|
|
i += 1
|
|
time.sleep(10)
|
|
|
|
msg = 'percona-cluster vip not found'
|
|
assert self.master_unit is not None, msg
|
|
|
|
_, code = self.master_unit.run('sudo crm_verify --live-check')
|
|
assert code == 0, "'crm_verify --live-check' failed"
|
|
|
|
resources = ['res_mysql_vip']
|
|
resources += ['res_mysql_monitor:%d' %
|
|
m for m in range(self.units)]
|
|
|
|
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
|
|
else:
|
|
self.master_unit = self.find_master(ha=self.ha)
|
|
|
|
def test_pxc_running(self):
|
|
'''
|
|
Ensure PXC is running on all units
|
|
'''
|
|
for unit in self.d.sentry['percona-cluster']:
|
|
assert self.is_mysqld_running(unit), 'mysql not running: %s' % unit
|
|
|
|
def test_bootstrapped_and_clustered(self):
|
|
'''
|
|
Ensure PXC is bootstrapped and that peer units are clustered
|
|
'''
|
|
self.log.info('Ensuring PXC is bootstrapped')
|
|
msg = "Percona cluster failed to bootstrap"
|
|
assert self.is_pxc_bootstrapped(), msg
|
|
|
|
self.log.info('Checking PXC cluster size == {}'.format(self.units))
|
|
got = int(self.get_cluster_size())
|
|
msg = ("Percona cluster unexpected size"
|
|
" (wanted=%s, got=%s)" % (self.units, got))
|
|
assert got == self.units, msg
|
|
|
|
def test_bootstrap_uuid_set_in_the_relation(self):
|
|
"""Verify that the bootstrap-uuid attribute was set by the leader and
|
|
all the peers where notified.
|
|
"""
|
|
(leader_uuid, code) = self.master_unit.run("leader-get bootstrap-uuid")
|
|
assert leader_uuid
|
|
|
|
cmd_rel_get = ("relation-get -r `relation-ids cluster` "
|
|
"bootstrap-uuid %s")
|
|
units = self.d.sentry['percona-cluster']
|
|
for unit in units:
|
|
for peer in units:
|
|
cmd = cmd_rel_get % peer.info['unit_name']
|
|
self.log.debug(cmd)
|
|
(output, code) = unit.run(cmd)
|
|
assert code == 0
|
|
assert output == leader_uuid, "%s != %s" % (output,
|
|
leader_uuid)
|
|
|
|
def test_pause_resume(self):
|
|
'''
|
|
Ensure pasue/resume actions stop/start mysqld on units
|
|
'''
|
|
self.log.info('Testing pause/resume actions')
|
|
self.log.info('Pausing service on first PXC unit')
|
|
unit = self.d.sentry['percona-cluster'][0]
|
|
assert self.is_mysqld_running(unit), 'mysql not running'
|
|
assert self.utils.status_get(unit)[0] == "active"
|
|
|
|
action_id = self.utils.run_action(unit, "pause")
|
|
assert self.utils.wait_on_action(action_id), "Pause action failed."
|
|
self.d.sentry.wait()
|
|
|
|
# Note that is_mysqld_running will print an error message when
|
|
# mysqld is not running. This is by design but it looks odd
|
|
# in the output.
|
|
assert not self.is_mysqld_running(unit=unit), \
|
|
"mysqld is still running!"
|
|
|
|
self.log.info('Resuming service on first PXC unit')
|
|
assert self.utils.status_get(unit)[0] == "maintenance"
|
|
action_id = self.utils.run_action(unit, "resume")
|
|
assert self.utils.wait_on_action(action_id), "Resume action failed"
|
|
assert self.utils.status_get(unit)[0] == "active"
|
|
assert self.is_mysqld_running(unit=unit), \
|
|
"mysqld not running after resume."
|
|
self._auto_wait_for_status()
|
|
|
|
def test_kill_master(self):
|
|
'''
|
|
Ensure that killing the mysqld on the master unit results
|
|
in a VIP failover
|
|
'''
|
|
self.log.info('Testing failover of master unit on mysqld failure')
|
|
# we are going to kill the master
|
|
old_master = self.master_unit
|
|
self.log.info(
|
|
'kill -9 mysqld on {}'.format(self.master_unit.info['unit_name'])
|
|
)
|
|
self.master_unit.run('sudo killall -9 mysqld')
|
|
|
|
self.log.info('looking for the new master')
|
|
i = 0
|
|
changed = False
|
|
while i < 10 and not changed:
|
|
i += 1
|
|
time.sleep(5) # give some time to pacemaker to react
|
|
new_master = self.find_master(ha=self.ha)
|
|
|
|
if (new_master and new_master.info['unit_name'] !=
|
|
old_master.info['unit_name']):
|
|
self.log.info(
|
|
'New master unit detected'
|
|
' on {}'.format(new_master.info['unit_name'])
|
|
)
|
|
changed = True
|
|
|
|
assert changed, "The master didn't change"
|
|
|
|
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
|
|
|
|
def test_change_root_password(self):
|
|
"""
|
|
Change root password and verify the change was effectively applied.
|
|
"""
|
|
|
|
new_root_passwd = 'openstack'
|
|
|
|
u = self.master_unit
|
|
root_password, _ = PXC_ROOT_PASSWD
|
|
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(root_password)
|
|
output, code = u.run(cmd)
|
|
|
|
assert code == 0, output
|
|
|
|
self.d.configure('percona-cluster', {'root-password': new_root_passwd})
|
|
|
|
time.sleep(5) # give some time to the unit to start the hook
|
|
self.d.sentry.wait() # wait until the hook finishes
|
|
|
|
# try to connect using the new root password
|
|
cmd = "mysql -uroot -p{} -e\"select 1;\" ".format(new_root_passwd)
|
|
output, code = u.run(cmd)
|
|
|
|
assert code == 0, output
|
|
|
|
def find_master(self, ha=True):
|
|
for unit in self.d.sentry['percona-cluster']:
|
|
if not ha:
|
|
return unit
|
|
|
|
# is the vip running here?
|
|
output, code = unit.run('sudo ip a | grep "inet %s/"' % self.vip)
|
|
self.log.info("Checking {}".format(unit.info['unit_name']))
|
|
self.log.debug(output)
|
|
if code == 0:
|
|
self.log.info('vip ({}) running in {}'.format(
|
|
self.vip,
|
|
unit.info['unit_name'])
|
|
)
|
|
return unit
|
|
|
|
def get_pcmkr_resources(self, unit=None):
|
|
if unit:
|
|
u = unit
|
|
else:
|
|
u = self.master_unit
|
|
|
|
output, code = u.run('sudo crm_resource -l')
|
|
|
|
assert code == 0, 'could not get "crm resource list"'
|
|
|
|
return output.split('\n')
|
|
|
|
def is_mysqld_running(self, unit=None):
|
|
if unit:
|
|
u = unit
|
|
else:
|
|
u = self.master_unit
|
|
|
|
_, code = u.run('pidof mysqld')
|
|
if code != 0:
|
|
self.log.debug("command returned non-zero '%s'" % (code))
|
|
return False
|
|
|
|
return True
|
|
|
|
def get_wsrep_value(self, attr, unit=None):
|
|
if unit:
|
|
u = unit
|
|
else:
|
|
u = self.master_unit
|
|
root_password, _ = u.run('leader-get root-password')
|
|
cmd = ("mysql -uroot -p{} -e\"show status like '{}';\"| "
|
|
"grep {}".format(root_password, attr, attr))
|
|
output, code = u.run(cmd)
|
|
if code != 0:
|
|
self.log.debug("command returned non-zero '%s'" % (code))
|
|
return ""
|
|
|
|
value = re.search(r"^.+?\s+(.+)", output).group(1)
|
|
self.log.info("%s = %s" % (attr, value))
|
|
return value
|
|
|
|
def is_pxc_bootstrapped(self, unit=None):
|
|
value = self.get_wsrep_value('wsrep_ready', unit)
|
|
return value.lower() in ['on', 'ready']
|
|
|
|
def get_cluster_size(self, unit=None):
|
|
return self.get_wsrep_value('wsrep_cluster_size', unit)
|
|
|
|
def is_port_open(self, unit=None, port='3306', address=None):
|
|
if unit:
|
|
addr = unit.info['public-address']
|
|
elif address:
|
|
addr = address
|
|
else:
|
|
raise Exception('Please provide a unit or address')
|
|
|
|
try:
|
|
telnetlib.Telnet(addr, port)
|
|
return True
|
|
except socket.error as e:
|
|
if e.errno == 113:
|
|
self.log.error("could not connect to %s:%s" % (addr, port))
|
|
if e.errno == 111:
|
|
self.log.error("connection refused connecting"
|
|
" to %s:%s" % (addr,
|
|
port))
|
|
return False
|
|
|
|
def resolve_cnf_file(self):
|
|
if self._get_openstack_release() < self.xenial_mitaka:
|
|
return '/etc/mysql/my.cnf'
|
|
else:
|
|
return '/etc/mysql/percona-xtradb-cluster.conf.d/mysqld.cnf'
|
|
|
|
def test_restart_on_config_change(self):
|
|
"""Verify that the specified services are restarted when the
|
|
config is changed."""
|
|
|
|
sentry = self.d.sentry['percona-cluster'][0]
|
|
juju_service = 'percona-cluster'
|
|
|
|
# Expected default and alternate values
|
|
set_default = {'peer-timeout': 'PT3S'}
|
|
set_alternate = {'peer-timeout': 'PT15S'}
|
|
|
|
# Config file affected by juju set config change
|
|
conf_file = self.resolve_cnf_file()
|
|
|
|
# Services which are expected to restart upon config change
|
|
services = {
|
|
'mysqld': conf_file,
|
|
}
|
|
|
|
# Make config change, check for service restarts
|
|
self.utils.log.debug('Making config change on {}...'
|
|
.format(juju_service))
|
|
mtime = self.utils.get_sentry_time(sentry)
|
|
self.d.configure(juju_service, set_alternate)
|
|
self._auto_wait_for_status()
|
|
|
|
sleep_time = 40
|
|
for s, conf_file in services.iteritems():
|
|
self.utils.log.debug("Checking that service restarted: {}"
|
|
.format(s))
|
|
if not self.utils.validate_service_config_changed(
|
|
sentry, mtime, s, conf_file, retry_count=5,
|
|
retry_sleep_time=sleep_time,
|
|
sleep_time=sleep_time):
|
|
self.d.configure(juju_service, set_default)
|
|
msg = "service {} didn't restart after config change".format(s)
|
|
amulet.raise_status(amulet.FAIL, msg=msg)
|
|
sleep_time = 0
|
|
|
|
self.d.configure(juju_service, set_default)
|
|
self._auto_wait_for_status()
|