Merge "Series Upgrade"

This commit is contained in:
Zuul 2018-09-19 07:41:36 +00:00 committed by Gerrit Code Review
commit 47cde88fd7
12 changed files with 349 additions and 26 deletions

View File

@ -1,5 +1,4 @@
Overview
========
# Overview
Percona XtraDB Cluster is a high availability and high scalability solution for
MySQL clustering. Percona XtraDB Cluster integrates Percona Server with the
@ -8,11 +7,9 @@ which enables you to create a cost-effective MySQL cluster.
This charm deploys Percona XtraDB Cluster onto Ubuntu.
Usage
=====
# Usage
Deployment
----------
## Deployment
To deploy this charm:
@ -35,8 +32,7 @@ The root password for mysql can be retrieved using the following command:
This is only usable from within one of the units within the deployment
(access to root is restricted to localhost only).
Memory Configuration
-------------------
## Memory Configuration
Percona Cluster is extremely memory sensitive. Setting memory values too low
will give poor performance. Setting them too high will create problems that are
@ -76,8 +72,7 @@ requirements and resources available.
[2] http://www.mysqlcalculator.com/
HA/Clustering
-------------
## HA/Clustering
When more than one unit of the charm is deployed with the hacluster charm
the percona charm will bring up an Active/Active cluster. The process of
@ -129,8 +124,7 @@ If both 'vip' and 'dns-ha' are set, as they are mutually exclusive
If 'dns-ha' is set and 'os-access-hostname' is not set
If the 'access' binding is not set and 'dns-ha' is set, consumers of the db may not be allowed to connect
Network Space support
---------------------
## Network Space support
This charm supports the use of Juju Network Spaces, allowing the charm to be bound
to network space configurations managed directly by Juju. This is only supported
@ -159,10 +153,77 @@ MySQL databases services from other charms.
**NOTE:** Existing deployments using the access-network configuration option will continue to function; this option is preferred over any network space binding provided for the 'shared-db' relation if set.
Limitations
============
# Limitations
Note that Percona XtraDB Cluster is not a 'scale-out' MySQL solution; reads
and writes are channelled through a single service unit and synchronously
replicated to other nodes in the cluster; reads/writes are as slow as the
slowest node you have in your deployment.
# Series Upgrade
## Procedure
1. Take a backup of all the databases
```sh
juju run-action mysql/N backup
```
* Get that backup off the mysql/N unit and somehwere safe.
```sh
juju scp -- -r mysql/N:/opt/backups/mysql /path/to/local/backup/dir
```
2. Pause all non-leader units and corresponding hacluster units.
The leader node will remain up for the time being. This is to ensure the leader has the latest sequence number and will be considered the most up to date by the cluster.
```sh
juju run-action hacluster/N pause
juju run-action percona-cluster/N pause
```
3. Prepare the leader node
```sh
juju upgrade-series prepare $MACHINE_NUMBER $SERIES
```
4. Administratively perform the upgrade.
* do-release-upgrade plus any further steps administratively required steps for an upgrade.
5. Reboot
6. Complete the series upgrade on the leader:
```sh
juju upgrade-series complete $MACHINE_NUMBER
```
7. Administratively validate the leader node database is up and running
* Connect to the database and check for expected data
* Review "SHOW GLOBAL STATUS;"
8. Upgrade the non-leader nodes one at a time following the same pattern summarized bellow:
* juju upgrade-series prepare $MACHINE_NUMBER $SERIES
* Administratively Upgrade
* Reboot
* juju upgrade-series complete $MACHINE_NUMBER
* Validate
9. Finalize the upgrade
Run action on leader node.
This action informs each node of the cluster the upgrade process is complete cluster wide.
This also updates mysql configuration with all peers in the cluster.
```sh
juju run-action mysql/N complete-cluster-series-upgrade
```
10. Set future instance to the new series and set the source origin
```sh
juju set-series percona-cluster xenial
juju config mysql source=distro
```
## Documentation
* https://www.percona.com/doc/percona-xtradb-cluster/LATEST/howtos/upgrade_guide.html
* https://www.percona.com/doc/percona-xtradb-cluster/5.6/upgrading_guide_55_56.html
* https://www.percona.com/blog/2014/09/01/galera-replication-how-to-recover-a-pxc-cluster/

View File

@ -17,4 +17,10 @@ backup:
type: boolean
default: false
description: Make an incremental database backup
complete-cluster-series-upgrade:
description: |
Perform final operations post series upgrade. Inform all nodes in the
cluster the upgrade is complete cluster wide. Update configuration with all
peers for wsrep replication.
This action should be performed on the current leader. Note the leader may
have changed during the series upgrade process.

View File

@ -12,6 +12,8 @@ from charmhelpers.core.hookenv import (
action_get,
action_set,
action_fail,
leader_set,
is_leader,
)
from charmhelpers.core.host import (
@ -47,6 +49,20 @@ def resume(args):
config_changed()
def complete_cluster_series_upgrade(args):
""" Complete the series upgrade process
After all nodes have been upgraded, this action is run to inform the whole
cluster the upgrade is done. Config files will be re-rendered with each
peer in the wsrep_cluster_address config.
"""
if is_leader():
# Unset cluster_series_upgrading
leader_set(cluster_series_upgrading="")
leader_set(cluster_series_upgrade_leader="")
config_changed()
def backup(args):
basedir = (action_get("basedir")).lower()
compress = action_get("compress")
@ -87,9 +103,11 @@ def backup(args):
action_fail("innobackupex failed, you should log on to the unit"
"and check the status of the database")
# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {"pause": pause, "resume": resume, "backup": backup}
ACTIONS = {"pause": pause, "resume": resume, "backup": backup,
"complete-cluster-series-upgrade": complete_cluster_series_upgrade}
def main(args):

View File

@ -0,0 +1 @@
actions.py

View File

@ -4,6 +4,7 @@ import sys
import json
import os
import socket
import subprocess
from charmhelpers.core.hookenv import (
Hooks, UnregisteredHookError,
@ -34,6 +35,7 @@ from charmhelpers.core.host import (
service_stop,
file_hash,
lsb_release,
mkdir,
CompareHostReleases,
)
from charmhelpers.core.templating import render
@ -69,6 +71,10 @@ from charmhelpers.contrib.hardening.harden import harden
from charmhelpers.contrib.hardening.mysql.checks import run_mysql_checks
from charmhelpers.contrib.openstack.utils import (
is_unit_paused_set,
is_unit_upgrading_set,
set_unit_upgrading,
clear_unit_upgrading,
clear_unit_paused,
)
from charmhelpers.contrib.openstack.ha.utils import (
update_dns_ha_resource_params,
@ -113,6 +119,9 @@ from percona_utils import (
get_server_id,
is_sufficient_peers,
set_ready_on_peers,
pause_unit_helper,
resume_unit_helper,
check_for_socket,
)
from charmhelpers.core.unitdata import kv
@ -129,6 +138,8 @@ RES_MONITOR_PARAMS = ('params user="sstuser" password="%(sstpass)s" '
INITIAL_CLIENT_UPDATE_KEY = 'initial_client_update_done'
MYSQL_SOCKET = "/var/run/mysqld/mysqld.sock"
def install_percona_xtradb_cluster():
'''Attempt PXC install based on seeding of passwords for users'''
@ -195,6 +206,7 @@ def render_config(hosts=None):
'performance_schema': config('performance-schema'),
'is_leader': is_leader(),
'server_id': get_server_id(),
'series_upgrade': is_unit_upgrading_set(),
}
if config('prefer-ipv6'):
@ -305,12 +317,121 @@ def update_client_db_relations():
kvstore.flush()
@hooks.hook('pre-series-upgrade')
def prepare():
# Use the pause feature to stop mysql during the duration of the upgrade
pause_unit_helper(register_configs())
# Set this unit to series upgrading
set_unit_upgrading()
# The leader will "bootstrap" with no wrep peers
# Non-leaders will point only at the newly upgraded leader until the
# cluster series upgrade is completed.
# Set cluster_series_upgrading for the duration of the cluster series
# upgrade. This will be unset with the action
# complete-cluster-series-upgrade on the leader node.
hosts = []
if not leader_get('cluster_series_upgrade_leader'):
leader_set(cluster_series_upgrading=True)
leader_set(
cluster_series_upgrade_leader=get_relation_ip('cluster'))
else:
hosts = [leader_get('cluster_series_upgrade_leader')]
# Render config
render_config(hosts)
@hooks.hook('post-series-upgrade')
def series_upgrade():
# Set this unit to series upgrading
set_unit_upgrading()
# The leader will "bootstrap" with no wrep peers
# Non-leaders will point only at the newly upgraded leader until the
# cluster series upgrade is completed.
# Set cluster_series_upgrading for the duration of the cluster series
# upgrade. This will be unset with the action
# complete-cluster-series-upgrade on the leader node.
if (leader_get('cluster_series_upgrade_leader') ==
get_relation_ip('cluster')):
hosts = []
else:
hosts = [leader_get('cluster_series_upgrade_leader')]
# New series after series upgrade and reboot
_release = lsb_release()['DISTRIB_CODENAME'].lower()
if _release == "xenial":
# Guarantee /var/run/mysqld exists
_dir = '/var/run/mysqld'
mkdir(_dir, owner="mysql", group="mysql", perms=0o755)
# Install new versions of the percona packages
apt_install(determine_packages())
service_stop("mysql")
if _release == "bionic":
render_config(hosts)
if _release == "xenial":
# Move the packaged version empty DB out of the way.
cmd = ["mv", "/var/lib/percona-xtradb-cluster",
"/var/lib/percona-xtradb-cluster.dpkg"]
subprocess.check_call(cmd)
# Symlink the previous versions data to the new
cmd = ["ln", "-s", "/var/lib/mysql", "/var/lib/percona-xtradb-cluster"]
subprocess.check_call(cmd)
# Start mysql temporarily with no wrep for the upgrade
cmd = ["mysqld"]
if _release == "bionic":
cmd.append("--skip-grant-tables")
cmd.append("--user=mysql")
cmd.append("--wsrep-provider=none")
log("Starting mysqld --wsrep-provider='none' and waiting ...")
proc = subprocess.Popen(cmd, stderr=subprocess.PIPE)
# Wait for the mysql socket to exist
check_for_socket(MYSQL_SOCKET, exists=True)
# Execute the upgrade process
log("Running mysql_upgrade")
cmd = ['mysql_upgrade']
if _release == "xenial":
cmd.append('-p{}'.format(root_password()))
subprocess.check_call(cmd)
# Terminate the temporary mysql
proc.terminate()
# Wait for the mysql socket to be removed
check_for_socket(MYSQL_SOCKET, exists=False)
# Clear states
clear_unit_paused()
clear_unit_upgrading()
if _release == "xenial":
# Point at the correct my.cnf
cmd = ["update-alternatives", "--set", "my.cnf",
"/etc/mysql/percona-xtradb-cluster.cnf"]
subprocess.check_call(cmd)
# Render config
render_config(hosts)
resume_unit_helper(register_configs())
@hooks.hook('upgrade-charm')
@harden()
def upgrade():
if is_leader():
if is_unit_paused_set():
if is_unit_paused_set() or is_unit_upgrading_set():
log('Unit is paused, skiping upgrade', level=INFO)
return
@ -350,17 +471,18 @@ def upgrade():
@harden()
def config_changed():
# if we are paused or upgrading, delay doing any config changed hooks.
# It is forced on the resume.
if is_unit_paused_set() or is_unit_upgrading_set():
log("Unit is paused or upgrading. Skipping config_changed", "WARN")
return
# It is critical that the installation is attempted first before any
# rendering of the configuration files occurs.
# install_percona_xtradb_cluster has the code to decide if this is the
# leader or if the leader is bootstrapped and therefore ready for install.
install_percona_xtradb_cluster()
# if we are paused, delay doing any config changed hooks. It is forced on
# the resume.
if is_unit_paused_set():
return
if config('prefer-ipv6'):
assert_charm_supports_ipv6()
@ -368,20 +490,34 @@ def config_changed():
leader_bootstrapped = is_leader_bootstrapped()
leader_ip = leader_get('leader-ip')
if is_leader():
# Cluster upgrade adds some complication
cluster_series_upgrading = leader_get("cluster_series_upgrading")
if cluster_series_upgrading:
leader = (leader_get('cluster_series_upgrade_leader') ==
get_relation_ip('cluster'))
leader_ip = leader_get('cluster_series_upgrade_leader')
else:
leader = is_leader()
leader_ip = leader_get('leader-ip')
if leader:
# If the cluster has not been fully bootstrapped once yet, use an empty
# hosts list to avoid restarting the leader node's mysqld during
# cluster buildup.
# After, the cluster has bootstrapped at least one time, it is much
# less likely to have restart collisions. It is then safe to use the
# full hosts list and have the leader node's mysqld restart.
if not clustered_once():
# Empty hosts if cluster_series_upgrading
if not clustered_once() or cluster_series_upgrading:
hosts = []
log("Leader unit - bootstrap required=%s" % (not leader_bootstrapped),
DEBUG)
render_config_restart_on_changed(hosts,
bootstrap=not leader_bootstrapped)
elif leader_bootstrapped and is_sufficient_peers():
elif (leader_bootstrapped and
is_sufficient_peers() and not
cluster_series_upgrading):
# Skip if cluster_series_upgrading
# Speed up cluster process by bootstrapping when the leader has
# bootstrapped if we have expected number of peers
if leader_ip not in hosts:

View File

@ -7,6 +7,7 @@ import os
import shutil
import uuid
from functools import partial
import time
from charmhelpers.core.decorators import retry_on_exception
from charmhelpers.core.host import (
@ -63,6 +64,7 @@ from charmhelpers.contrib.openstack.utils import (
pause_unit,
resume_unit,
is_unit_paused_set,
is_unit_upgrading_set,
)
# NOTE: python-mysqldb is installed by charmhelpers.contrib.database.mysql so
@ -664,6 +666,9 @@ def charm_check_func():
@returns (status, message) - tuple of strings if an issue
"""
if is_unit_upgrading_set():
# Avoid looping through attempting to determine cluster_in_sync
return ("blocked", "Unit upgrading.")
@retry_on_exception(num_retries=10,
base_delay=2,
@ -1095,3 +1100,26 @@ def set_ready_on_peers():
"""
for relid in relation_ids('cluster'):
relation_set(relation_id=relid, ready=True)
def check_for_socket(file_name, exists=True, sleep=10, attempts=12):
"""Check that a socket file exists or does not exist.
:file_name: str File name
:exits: bool Check for file exists or not
:sleep: int Sleep time between attempts
:attempts: int Number of attempt before throwing an exception
:returns: boolean
:raises: Exception if max attmepts is reached
"""
for i in range(attempts):
if os.path.exists(file_name) == exists:
return
else:
log("{} file is not yet ihe correct state retrying. "
"Check for exists={}".format(file_name, exists),
DEBUG)
time.sleep(sleep)
# If we get here throw an exception
raise Exception("Socket {} not found after {} attempts."
.format(file_name, attempts))

1
hooks/post-series-upgrade Symbolic link
View File

@ -0,0 +1 @@
percona_hooks.py

1
hooks/pre-series-upgrade Symbolic link
View File

@ -0,0 +1 @@
percona_hooks.py

View File

@ -115,3 +115,17 @@ wsrep_retry_autocommit = 100
{% endif %}
!includedir /etc/mysql/conf.d/
{% if series_upgrade %}
# Required for compatibility with galera-2
# Append socket.checksum=1 to other options if others are in wsrep_provider_options. Eg.: "gmcast.listen_addr=tcp://127.0.0.1:15010; socket.checksum=1"
wsrep_provider_options="socket.checksum=1"
# Required for replication compatibility
log_bin_use_v1_row_events=1
avoid_temporal_upgrade=ON # Available in 5.6.24 and up
gtid_mode=0
binlog_checksum=NONE
# Required under certain conditions
read_only=ON
{% endif %}

View File

@ -45,6 +45,28 @@ class ResumeTestCase(CharmTestCase):
config_changed.assert_called_once_with()
class CompleteClusterSeriesUpgrade(CharmTestCase):
def setUp(self):
super(CompleteClusterSeriesUpgrade, self).setUp(
actions, ["config_changed", "is_leader", "leader_set"])
def test_leader_complete_series_upgrade(self):
self.is_leader.return_value = True
calls = [mock.call(cluster_series_upgrading=""),
mock.call(cluster_series_upgrade_leader="")]
actions.complete_cluster_series_upgrade([])
self.leader_set.assert_has_calls(calls)
self.config_changed.assert_called_once_with()
def test_non_leader_complete_series_upgrade(self):
self.is_leader.return_value = False
actions.complete_cluster_series_upgrade([])
self.leader_set.assert_not_called()
self.config_changed.assert_called_once_with()
class MainTestCase(CharmTestCase):
def setUp(self):

View File

@ -310,21 +310,29 @@ class TestConfigChanged(CharmTestCase):
'get_cluster_hosts',
'leader_get',
'set_ready_on_peers',
'is_unit_paused_set',
'is_unit_upgrading_set',
]
def setUp(self):
CharmTestCase.setUp(self, hooks, self.TO_PATCH)
self.config.side_effect = self.test_config.get
self.is_unit_paused_set.return_value = False
self.is_unit_upgrading_set.return_value = False
self.is_leader.return_value = False
self.is_leader_bootstrapped.return_value = False
self.is_bootstrapped.return_value = False
self.clustered_once.return_value = False
self.relation_ids.return_value = []
self.is_relation_made.return_value = False
self.leader_get.return_value = '10.10.10.10'
self.get_cluster_hosts.return_value = []
def _leader_get(key):
settings = {'leader-ip': '10.10.10.10',
'cluster_series_upgrading': False}
return settings.get(key)
self.leader_get.side_effect = _leader_get
def test_config_changed_open_port(self):
'''Ensure open_port is called with MySQL default port'''
self.is_leader_bootstrapped.return_value = True

View File

@ -377,6 +377,33 @@ class UtilsTests(CharmTestCase):
self.config.side_effect = lambda key: _config.get(key)
self.assertEqual(percona_utils.get_min_cluster_size(), 3)
@mock.patch("percona_utils.time")
@mock.patch("percona_utils.os")
def test_check_for_socket(self, _os, _time):
# Socket exists checking for exists
_os.path.exists.return_value = True
percona_utils.check_for_socket("filename", exists=True)
_time.sleep.assert_not_called()
# Socket does not exist checking for exists
_os.path.exists.return_value = False
with self.assertRaises(Exception):
percona_utils.check_for_socket("filename", exists=True)
_time.sleep.assert_called_with(10)
_time.reset_mock()
# Socket does not exist checking for not exists
_os.path.exists.return_value = False
percona_utils.check_for_socket("filename", exists=False)
_time.sleep.assert_not_called()
# Socket exists checking for not exists
_os.path.exists.return_value = True
with self.assertRaises(Exception):
percona_utils.check_for_socket("filename", exists=False)
_time.sleep.assert_called_with(10)
class UtilsTestsStatus(CharmTestCase):