charm-percona-cluster/tests/basic_deployment.py

300 lines
10 KiB
Python

# basic deployment test class for percona-xtradb-cluster
import amulet
import re
import os
import socket
import time
import telnetlib
import yaml
from charmhelpers.contrib.openstack.amulet.deployment import (
OpenStackAmuletDeployment
)
from charmhelpers.contrib.amulet.utils import AmuletUtils
class BasicDeployment(OpenStackAmuletDeployment):
utils = AmuletUtils()
def __init__(self, vip=None, units=1, series="trusty", openstack=None,
source=None, stable=False):
super(BasicDeployment, self).__init__(series, openstack, source,
stable)
self.units = units
self.master_unit = None
self.vip = None
if units > 1:
if vip:
self.vip = vip
elif 'AMULET_OS_VIP' in os.environ:
self.vip = os.environ.get('AMULET_OS_VIP')
elif os.path.isfile('local.yaml'):
with open('local.yaml', 'rb') as f:
self.cfg = yaml.safe_load(f.read())
self.vip = self.cfg.get('vip')
else:
amulet.raise_status(amulet.SKIP,
("Please set the vip in local.yaml or "
"env var AMULET_OS_VIP to run this test "
"suite"))
self.log = self.utils.get_logger()
def _add_services(self):
"""Add services
Add the services that we're testing, where percona-cluster is local,
and the rest of the service are from lp branches that are
compatible with the local charm (e.g. stable or next).
"""
this_service = {'name': 'percona-cluster',
'units': self.units}
other_services = []
if self.units > 1:
other_services.append({'name': 'hacluster'})
super(BasicDeployment, self)._add_services(this_service,
other_services)
def _add_relations(self):
"""Add all of the relations for the services."""
if self.units > 1:
relations = {'percona-cluster:ha': 'hacluster:ha'}
super(BasicDeployment, self)._add_relations(relations)
def _get_configs(self):
"""Configure all of the services."""
cfg_percona = {'sst-password': 'ubuntu',
'root-password': 't00r',
'min-cluster-size': self.units,
'vip': self.vip}
cfg_ha = {'debug': True,
'corosync_key': ('xZP7GDWV0e8Qs0GxWThXirNNYlScgi3sRTdZk/IXKD'
'qkNFcwdCWfRQnqrHU/6mb6sz6OIoZzX2MtfMQIDcXu'
'PqQyvKuv7YbRyGHmQwAWDUA4ed759VWAO39kHkfWp9'
'y5RRk/wcHakTcWYMwm70upDGJEP00YT3xem3NQy27A'
'C1w=')}
configs = {}
if self.units > 1:
cfg_ha['cluster_count'] = str(self.units)
configs['hacluster'] = cfg_ha
configs['percona-cluster'] = cfg_percona
return configs
def _configure_services(self):
super(BasicDeployment, self)._configure_services(self._get_configs())
def run(self):
self._add_services()
self._add_relations()
self._configure_services()
self._deploy()
self.d.sentry.wait()
self.test_deployment()
def test_deployment(self):
'''Top level test function executor'''
self.test_pacemaker()
self.test_pxc_running()
self.test_bootstrapped_and_clustered()
self.test_pause_resume()
self.test_kill_master()
def test_pacemaker(self):
'''
Ensure that pacemaker and corosync are correctly configured in
clustered deployments.
side effect: self.master_unit should be set after execution
'''
if self.units > 1:
i = 0
while i < 30 and not self.master_unit:
self.master_unit = self.find_master()
i += 1
time.sleep(10)
msg = 'percona-cluster vip not found'
assert self.master_unit is not None, msg
_, code = self.master_unit.run('sudo crm_verify --live-check')
assert code == 0, "'crm_verify --live-check' failed"
resources = ['res_mysql_vip']
resources += ['res_mysql_monitor:%d' %
m for m in range(self.units)]
assert sorted(self.get_pcmkr_resources()) == sorted(resources)
else:
self.master_unit = self.find_master(ha=False)
def test_pxc_running(self):
'''
Ensure PXC is running on all units
'''
for unit in self.d.sentry['percona-cluster']:
assert self.is_mysqld_running(unit), 'mysql not running: %s' % unit
def test_bootstrapped_and_clustered(self):
'''
Ensure PXC is bootstrapped and that peer units are clustered
'''
self.log.info('Ensuring PXC is bootstrapped')
msg = "Percona cluster failed to bootstrap"
assert self.is_pxc_bootstrapped(), msg
self.log.info('Checking PXC cluster size == {}'.format(self.units))
got = int(self.get_cluster_size())
msg = ("Percona cluster unexpected size"
" (wanted=%s, got=%s)" % (self.units, got))
assert got == self.units, msg
def test_pause_resume(self):
'''
Ensure pasue/resume actions stop/start mysqld on units
'''
self.log.info('Testing pause/resume actions')
self.log.info('Pausing service on first PXC unit')
unit = self.d.sentry['percona-cluster'][0]
assert self.is_mysqld_running(unit), 'mysql not running'
assert self.utils.status_get(unit)[0] == "active"
action_id = self.utils.run_action(unit, "pause")
assert self.utils.wait_on_action(action_id), "Pause action failed."
# Note that is_mysqld_running will print an error message when
# mysqld is not running. This is by design but it looks odd
# in the output.
assert not self.is_mysqld_running(unit=unit), \
"mysqld is still running!"
self.log.info('Resuming service on first PXC unit')
assert self.utils.status_get(unit)[0] == "maintenance"
action_id = self.utils.run_action(unit, "resume")
assert self.utils.wait_on_action(action_id), "Resume action failed"
assert self.utils.status_get(unit)[0] == "active"
assert self.is_mysqld_running(unit=unit), \
"mysqld not running after resume."
def test_kill_master(self):
'''
Ensure that killing the mysqld on the master unit results
in a VIP failover
'''
self.log.info('Testing failover of master unit on mysqld failure')
# we are going to kill the master
old_master = self.master_unit
self.log.info(
'kill -9 mysqld on {}'.format(self.master_unit.info['unit_name'])
)
self.master_unit.run('sudo killall -9 mysqld')
self.log.info('looking for the new master')
i = 0
changed = False
while i < 10 and not changed:
i += 1
time.sleep(5) # give some time to pacemaker to react
new_master = self.find_master()
if (new_master and new_master.info['unit_name'] !=
old_master.info['unit_name']):
self.log.info(
'New master unit detected'
' on {}'.format(new_master.info['unit_name'])
)
changed = True
assert changed, "The master didn't change"
assert self.is_port_open(address=self.vip), 'cannot connect to vip'
def find_master(self, ha=True):
for unit in self.d.sentry['percona-cluster']:
if not ha:
return unit
# is the vip running here?
output, code = unit.run('sudo ip a | grep "inet %s/"' % self.vip)
self.log.info("Checking {}".format(unit.info['unit_name']))
self.log.debug(output)
if code == 0:
self.log.info('vip ({}) running in {}'.format(
self.vip,
unit.info['unit_name'])
)
return unit
def get_pcmkr_resources(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
output, code = u.run('sudo crm_resource -l')
assert code == 0, 'could not get "crm resource list"'
return output.split('\n')
def is_mysqld_running(self, unit=None):
if unit:
u = unit
else:
u = self.master_unit
_, code = u.run('pidof mysqld')
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return False
return True
def get_wsrep_value(self, attr, unit=None):
if unit:
u = unit
else:
u = self.master_unit
cmd = ("mysql -uroot -pt00r -e\"show status like '%s';\"| "
"grep %s" % (attr, attr))
output, code = u.run(cmd)
if code != 0:
self.log.debug("command returned non-zero '%s'" % (code))
return ""
value = re.search(r"^.+?\s+(.+)", output).group(1)
self.log.info("%s = %s" % (attr, value))
return value
def is_pxc_bootstrapped(self, unit=None):
value = self.get_wsrep_value('wsrep_ready', unit)
return value.lower() in ['on', 'ready']
def get_cluster_size(self, unit=None):
return self.get_wsrep_value('wsrep_cluster_size', unit)
def is_port_open(self, unit=None, port='3306', address=None):
if unit:
addr = unit.info['public-address']
elif address:
addr = address
else:
raise Exception('Please provide a unit or address')
try:
telnetlib.Telnet(addr, port)
return True
except socket.error as e:
if e.errno == 113:
self.log.error("could not connect to %s:%s" % (addr, port))
if e.errno == 111:
self.log.error("connection refused connecting"
" to %s:%s" % (addr,
port))
return False