From fe131a0aa67517578707cdc1f4110dd245f87cbe Mon Sep 17 00:00:00 2001 From: David Ames Date: Wed, 1 Aug 2018 23:33:36 +0000 Subject: [PATCH] Series Upgrade Implement the series-upgrade feature allowing to move between Ubuntu series. Change-Id: If38bf1767c8e0c9242071140535b44e12c9f9759 --- README.md | 89 +++++++++++--- actions.yaml | 8 +- actions/actions.py | 20 ++- actions/complete-cluster-series-upgrade | 1 + hooks/percona_hooks.py | 154 ++++++++++++++++++++++-- hooks/percona_utils.py | 28 +++++ hooks/post-series-upgrade | 1 + hooks/pre-series-upgrade | 1 + templates/my.cnf | 14 +++ unit_tests/test_actions.py | 22 ++++ unit_tests/test_percona_hooks.py | 10 +- unit_tests/test_percona_utils.py | 27 +++++ 12 files changed, 349 insertions(+), 26 deletions(-) create mode 120000 actions/complete-cluster-series-upgrade create mode 120000 hooks/post-series-upgrade create mode 120000 hooks/pre-series-upgrade diff --git a/README.md b/README.md index e1f67e1..7634252 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -Overview -======== +# Overview Percona XtraDB Cluster is a high availability and high scalability solution for MySQL clustering. Percona XtraDB Cluster integrates Percona Server with the @@ -8,11 +7,9 @@ which enables you to create a cost-effective MySQL cluster. This charm deploys Percona XtraDB Cluster onto Ubuntu. -Usage -===== +# Usage -Deployment ----------- +## Deployment To deploy this charm: @@ -35,8 +32,7 @@ The root password for mysql can be retrieved using the following command: This is only usable from within one of the units within the deployment (access to root is restricted to localhost only). -Memory Configuration -------------------- +## Memory Configuration Percona Cluster is extremely memory sensitive. Setting memory values too low will give poor performance. Setting them too high will create problems that are @@ -76,8 +72,7 @@ requirements and resources available. [2] http://www.mysqlcalculator.com/ -HA/Clustering -------------- +## HA/Clustering When more than one unit of the charm is deployed with the hacluster charm the percona charm will bring up an Active/Active cluster. The process of @@ -129,8 +124,7 @@ If both 'vip' and 'dns-ha' are set, as they are mutually exclusive If 'dns-ha' is set and 'os-access-hostname' is not set If the 'access' binding is not set and 'dns-ha' is set, consumers of the db may not be allowed to connect -Network Space support ---------------------- +## Network Space support This charm supports the use of Juju Network Spaces, allowing the charm to be bound to network space configurations managed directly by Juju. This is only supported @@ -159,10 +153,77 @@ MySQL databases services from other charms. **NOTE:** Existing deployments using the access-network configuration option will continue to function; this option is preferred over any network space binding provided for the 'shared-db' relation if set. -Limitations -============ +# Limitations Note that Percona XtraDB Cluster is not a 'scale-out' MySQL solution; reads and writes are channelled through a single service unit and synchronously replicated to other nodes in the cluster; reads/writes are as slow as the slowest node you have in your deployment. + +# Series Upgrade + +## Procedure + +1. Take a backup of all the databases + +```sh +juju run-action mysql/N backup +``` + * Get that backup off the mysql/N unit and somehwere safe. +```sh +juju scp -- -r mysql/N:/opt/backups/mysql /path/to/local/backup/dir +``` + +2. Pause all non-leader units and corresponding hacluster units. +The leader node will remain up for the time being. This is to ensure the leader has the latest sequence number and will be considered the most up to date by the cluster. +```sh +juju run-action hacluster/N pause +juju run-action percona-cluster/N pause +``` + +3. Prepare the leader node +```sh +juju upgrade-series prepare $MACHINE_NUMBER $SERIES +``` + +4. Administratively perform the upgrade. +* do-release-upgrade plus any further steps administratively required steps for an upgrade. + +5. Reboot + +6. Complete the series upgrade on the leader: +```sh +juju upgrade-series complete $MACHINE_NUMBER +``` + +7. Administratively validate the leader node database is up and running +* Connect to the database and check for expected data +* Review "SHOW GLOBAL STATUS;" + + +8. Upgrade the non-leader nodes one at a time following the same pattern summarized bellow: + +* juju upgrade-series prepare $MACHINE_NUMBER $SERIES +* Administratively Upgrade +* Reboot +* juju upgrade-series complete $MACHINE_NUMBER +* Validate + +9. Finalize the upgrade +Run action on leader node. +This action informs each node of the cluster the upgrade process is complete cluster wide. +This also updates mysql configuration with all peers in the cluster. +```sh +juju run-action mysql/N complete-cluster-series-upgrade +``` + +10. Set future instance to the new series and set the source origin +```sh +juju set-series percona-cluster xenial +juju config mysql source=distro +``` + +## Documentation +* https://www.percona.com/doc/percona-xtradb-cluster/LATEST/howtos/upgrade_guide.html +* https://www.percona.com/doc/percona-xtradb-cluster/5.6/upgrading_guide_55_56.html +* https://www.percona.com/blog/2014/09/01/galera-replication-how-to-recover-a-pxc-cluster/ diff --git a/actions.yaml b/actions.yaml index 0116cbd..73a6957 100644 --- a/actions.yaml +++ b/actions.yaml @@ -17,4 +17,10 @@ backup: type: boolean default: false description: Make an incremental database backup - +complete-cluster-series-upgrade: + description: | + Perform final operations post series upgrade. Inform all nodes in the + cluster the upgrade is complete cluster wide. Update configuration with all + peers for wsrep replication. + This action should be performed on the current leader. Note the leader may + have changed during the series upgrade process. diff --git a/actions/actions.py b/actions/actions.py index 8870bba..cd65c3f 100755 --- a/actions/actions.py +++ b/actions/actions.py @@ -12,6 +12,8 @@ from charmhelpers.core.hookenv import ( action_get, action_set, action_fail, + leader_set, + is_leader, ) from charmhelpers.core.host import ( @@ -47,6 +49,20 @@ def resume(args): config_changed() +def complete_cluster_series_upgrade(args): + """ Complete the series upgrade process + + After all nodes have been upgraded, this action is run to inform the whole + cluster the upgrade is done. Config files will be re-rendered with each + peer in the wsrep_cluster_address config. + """ + if is_leader(): + # Unset cluster_series_upgrading + leader_set(cluster_series_upgrading="") + leader_set(cluster_series_upgrade_leader="") + config_changed() + + def backup(args): basedir = (action_get("basedir")).lower() compress = action_get("compress") @@ -87,9 +103,11 @@ def backup(args): action_fail("innobackupex failed, you should log on to the unit" "and check the status of the database") + # A dictionary of all the defined actions to callables (which take # parsed arguments). -ACTIONS = {"pause": pause, "resume": resume, "backup": backup} +ACTIONS = {"pause": pause, "resume": resume, "backup": backup, + "complete-cluster-series-upgrade": complete_cluster_series_upgrade} def main(args): diff --git a/actions/complete-cluster-series-upgrade b/actions/complete-cluster-series-upgrade new file mode 120000 index 0000000..405a394 --- /dev/null +++ b/actions/complete-cluster-series-upgrade @@ -0,0 +1 @@ +actions.py \ No newline at end of file diff --git a/hooks/percona_hooks.py b/hooks/percona_hooks.py index 1b688bb..be4b4db 100755 --- a/hooks/percona_hooks.py +++ b/hooks/percona_hooks.py @@ -4,6 +4,7 @@ import sys import json import os import socket +import subprocess from charmhelpers.core.hookenv import ( Hooks, UnregisteredHookError, @@ -34,6 +35,7 @@ from charmhelpers.core.host import ( service_stop, file_hash, lsb_release, + mkdir, CompareHostReleases, ) from charmhelpers.core.templating import render @@ -69,6 +71,10 @@ from charmhelpers.contrib.hardening.harden import harden from charmhelpers.contrib.hardening.mysql.checks import run_mysql_checks from charmhelpers.contrib.openstack.utils import ( is_unit_paused_set, + is_unit_upgrading_set, + set_unit_upgrading, + clear_unit_upgrading, + clear_unit_paused, ) from charmhelpers.contrib.openstack.ha.utils import ( update_dns_ha_resource_params, @@ -113,6 +119,9 @@ from percona_utils import ( get_server_id, is_sufficient_peers, set_ready_on_peers, + pause_unit_helper, + resume_unit_helper, + check_for_socket, ) from charmhelpers.core.unitdata import kv @@ -129,6 +138,8 @@ RES_MONITOR_PARAMS = ('params user="sstuser" password="%(sstpass)s" ' INITIAL_CLIENT_UPDATE_KEY = 'initial_client_update_done' +MYSQL_SOCKET = "/var/run/mysqld/mysqld.sock" + def install_percona_xtradb_cluster(): '''Attempt PXC install based on seeding of passwords for users''' @@ -195,6 +206,7 @@ def render_config(hosts=None): 'performance_schema': config('performance-schema'), 'is_leader': is_leader(), 'server_id': get_server_id(), + 'series_upgrade': is_unit_upgrading_set(), } if config('prefer-ipv6'): @@ -305,12 +317,121 @@ def update_client_db_relations(): kvstore.flush() +@hooks.hook('pre-series-upgrade') +def prepare(): + # Use the pause feature to stop mysql during the duration of the upgrade + pause_unit_helper(register_configs()) + # Set this unit to series upgrading + set_unit_upgrading() + # The leader will "bootstrap" with no wrep peers + # Non-leaders will point only at the newly upgraded leader until the + # cluster series upgrade is completed. + # Set cluster_series_upgrading for the duration of the cluster series + # upgrade. This will be unset with the action + # complete-cluster-series-upgrade on the leader node. + hosts = [] + + if not leader_get('cluster_series_upgrade_leader'): + leader_set(cluster_series_upgrading=True) + leader_set( + cluster_series_upgrade_leader=get_relation_ip('cluster')) + else: + hosts = [leader_get('cluster_series_upgrade_leader')] + + # Render config + render_config(hosts) + + +@hooks.hook('post-series-upgrade') +def series_upgrade(): + + # Set this unit to series upgrading + set_unit_upgrading() + + # The leader will "bootstrap" with no wrep peers + # Non-leaders will point only at the newly upgraded leader until the + # cluster series upgrade is completed. + # Set cluster_series_upgrading for the duration of the cluster series + # upgrade. This will be unset with the action + # complete-cluster-series-upgrade on the leader node. + if (leader_get('cluster_series_upgrade_leader') == + get_relation_ip('cluster')): + hosts = [] + else: + hosts = [leader_get('cluster_series_upgrade_leader')] + + # New series after series upgrade and reboot + _release = lsb_release()['DISTRIB_CODENAME'].lower() + + if _release == "xenial": + # Guarantee /var/run/mysqld exists + _dir = '/var/run/mysqld' + mkdir(_dir, owner="mysql", group="mysql", perms=0o755) + + # Install new versions of the percona packages + apt_install(determine_packages()) + service_stop("mysql") + + if _release == "bionic": + render_config(hosts) + + if _release == "xenial": + # Move the packaged version empty DB out of the way. + cmd = ["mv", "/var/lib/percona-xtradb-cluster", + "/var/lib/percona-xtradb-cluster.dpkg"] + subprocess.check_call(cmd) + + # Symlink the previous versions data to the new + cmd = ["ln", "-s", "/var/lib/mysql", "/var/lib/percona-xtradb-cluster"] + subprocess.check_call(cmd) + + # Start mysql temporarily with no wrep for the upgrade + cmd = ["mysqld"] + if _release == "bionic": + cmd.append("--skip-grant-tables") + cmd.append("--user=mysql") + cmd.append("--wsrep-provider=none") + log("Starting mysqld --wsrep-provider='none' and waiting ...") + proc = subprocess.Popen(cmd, stderr=subprocess.PIPE) + + # Wait for the mysql socket to exist + check_for_socket(MYSQL_SOCKET, exists=True) + + # Execute the upgrade process + log("Running mysql_upgrade") + cmd = ['mysql_upgrade'] + if _release == "xenial": + cmd.append('-p{}'.format(root_password())) + subprocess.check_call(cmd) + + # Terminate the temporary mysql + proc.terminate() + + # Wait for the mysql socket to be removed + check_for_socket(MYSQL_SOCKET, exists=False) + + # Clear states + clear_unit_paused() + clear_unit_upgrading() + + if _release == "xenial": + # Point at the correct my.cnf + cmd = ["update-alternatives", "--set", "my.cnf", + "/etc/mysql/percona-xtradb-cluster.cnf"] + subprocess.check_call(cmd) + + # Render config + render_config(hosts) + + resume_unit_helper(register_configs()) + + @hooks.hook('upgrade-charm') @harden() def upgrade(): if is_leader(): - if is_unit_paused_set(): + if is_unit_paused_set() or is_unit_upgrading_set(): log('Unit is paused, skiping upgrade', level=INFO) return @@ -350,17 +471,18 @@ def upgrade(): @harden() def config_changed(): + # if we are paused or upgrading, delay doing any config changed hooks. + # It is forced on the resume. + if is_unit_paused_set() or is_unit_upgrading_set(): + log("Unit is paused or upgrading. Skipping config_changed", "WARN") + return + # It is critical that the installation is attempted first before any # rendering of the configuration files occurs. # install_percona_xtradb_cluster has the code to decide if this is the # leader or if the leader is bootstrapped and therefore ready for install. install_percona_xtradb_cluster() - # if we are paused, delay doing any config changed hooks. It is forced on - # the resume. - if is_unit_paused_set(): - return - if config('prefer-ipv6'): assert_charm_supports_ipv6() @@ -368,20 +490,34 @@ def config_changed(): leader_bootstrapped = is_leader_bootstrapped() leader_ip = leader_get('leader-ip') - if is_leader(): + # Cluster upgrade adds some complication + cluster_series_upgrading = leader_get("cluster_series_upgrading") + if cluster_series_upgrading: + leader = (leader_get('cluster_series_upgrade_leader') == + get_relation_ip('cluster')) + leader_ip = leader_get('cluster_series_upgrade_leader') + else: + leader = is_leader() + leader_ip = leader_get('leader-ip') + + if leader: # If the cluster has not been fully bootstrapped once yet, use an empty # hosts list to avoid restarting the leader node's mysqld during # cluster buildup. # After, the cluster has bootstrapped at least one time, it is much # less likely to have restart collisions. It is then safe to use the # full hosts list and have the leader node's mysqld restart. - if not clustered_once(): + # Empty hosts if cluster_series_upgrading + if not clustered_once() or cluster_series_upgrading: hosts = [] log("Leader unit - bootstrap required=%s" % (not leader_bootstrapped), DEBUG) render_config_restart_on_changed(hosts, bootstrap=not leader_bootstrapped) - elif leader_bootstrapped and is_sufficient_peers(): + elif (leader_bootstrapped and + is_sufficient_peers() and not + cluster_series_upgrading): + # Skip if cluster_series_upgrading # Speed up cluster process by bootstrapping when the leader has # bootstrapped if we have expected number of peers if leader_ip not in hosts: diff --git a/hooks/percona_utils.py b/hooks/percona_utils.py index 3cc0052..2301655 100644 --- a/hooks/percona_utils.py +++ b/hooks/percona_utils.py @@ -7,6 +7,7 @@ import os import shutil import uuid from functools import partial +import time from charmhelpers.core.decorators import retry_on_exception from charmhelpers.core.host import ( @@ -63,6 +64,7 @@ from charmhelpers.contrib.openstack.utils import ( pause_unit, resume_unit, is_unit_paused_set, + is_unit_upgrading_set, ) # NOTE: python-mysqldb is installed by charmhelpers.contrib.database.mysql so @@ -664,6 +666,9 @@ def charm_check_func(): @returns (status, message) - tuple of strings if an issue """ + if is_unit_upgrading_set(): + # Avoid looping through attempting to determine cluster_in_sync + return ("blocked", "Unit upgrading.") @retry_on_exception(num_retries=10, base_delay=2, @@ -1095,3 +1100,26 @@ def set_ready_on_peers(): """ for relid in relation_ids('cluster'): relation_set(relation_id=relid, ready=True) + + +def check_for_socket(file_name, exists=True, sleep=10, attempts=12): + """Check that a socket file exists or does not exist. + + :file_name: str File name + :exits: bool Check for file exists or not + :sleep: int Sleep time between attempts + :attempts: int Number of attempt before throwing an exception + :returns: boolean + :raises: Exception if max attmepts is reached + """ + for i in range(attempts): + if os.path.exists(file_name) == exists: + return + else: + log("{} file is not yet ihe correct state retrying. " + "Check for exists={}".format(file_name, exists), + DEBUG) + time.sleep(sleep) + # If we get here throw an exception + raise Exception("Socket {} not found after {} attempts." + .format(file_name, attempts)) diff --git a/hooks/post-series-upgrade b/hooks/post-series-upgrade new file mode 120000 index 0000000..2af5208 --- /dev/null +++ b/hooks/post-series-upgrade @@ -0,0 +1 @@ +percona_hooks.py \ No newline at end of file diff --git a/hooks/pre-series-upgrade b/hooks/pre-series-upgrade new file mode 120000 index 0000000..2af5208 --- /dev/null +++ b/hooks/pre-series-upgrade @@ -0,0 +1 @@ +percona_hooks.py \ No newline at end of file diff --git a/templates/my.cnf b/templates/my.cnf index 9f4e243..26a89b7 100644 --- a/templates/my.cnf +++ b/templates/my.cnf @@ -115,3 +115,17 @@ wsrep_retry_autocommit = 100 {% endif %} !includedir /etc/mysql/conf.d/ + +{% if series_upgrade %} +# Required for compatibility with galera-2 +# Append socket.checksum=1 to other options if others are in wsrep_provider_options. Eg.: "gmcast.listen_addr=tcp://127.0.0.1:15010; socket.checksum=1" +wsrep_provider_options="socket.checksum=1" +# Required for replication compatibility +log_bin_use_v1_row_events=1 +avoid_temporal_upgrade=ON # Available in 5.6.24 and up + +gtid_mode=0 +binlog_checksum=NONE +# Required under certain conditions +read_only=ON +{% endif %} diff --git a/unit_tests/test_actions.py b/unit_tests/test_actions.py index ffc8af6..d3aacb8 100644 --- a/unit_tests/test_actions.py +++ b/unit_tests/test_actions.py @@ -45,6 +45,28 @@ class ResumeTestCase(CharmTestCase): config_changed.assert_called_once_with() +class CompleteClusterSeriesUpgrade(CharmTestCase): + + def setUp(self): + super(CompleteClusterSeriesUpgrade, self).setUp( + actions, ["config_changed", "is_leader", "leader_set"]) + + def test_leader_complete_series_upgrade(self): + self.is_leader.return_value = True + + calls = [mock.call(cluster_series_upgrading=""), + mock.call(cluster_series_upgrade_leader="")] + actions.complete_cluster_series_upgrade([]) + self.leader_set.assert_has_calls(calls) + self.config_changed.assert_called_once_with() + + def test_non_leader_complete_series_upgrade(self): + self.is_leader.return_value = False + actions.complete_cluster_series_upgrade([]) + self.leader_set.assert_not_called() + self.config_changed.assert_called_once_with() + + class MainTestCase(CharmTestCase): def setUp(self): diff --git a/unit_tests/test_percona_hooks.py b/unit_tests/test_percona_hooks.py index 0971201..b06f2fd 100644 --- a/unit_tests/test_percona_hooks.py +++ b/unit_tests/test_percona_hooks.py @@ -310,21 +310,29 @@ class TestConfigChanged(CharmTestCase): 'get_cluster_hosts', 'leader_get', 'set_ready_on_peers', + 'is_unit_paused_set', + 'is_unit_upgrading_set', ] def setUp(self): CharmTestCase.setUp(self, hooks, self.TO_PATCH) self.config.side_effect = self.test_config.get self.is_unit_paused_set.return_value = False + self.is_unit_upgrading_set.return_value = False self.is_leader.return_value = False self.is_leader_bootstrapped.return_value = False self.is_bootstrapped.return_value = False self.clustered_once.return_value = False self.relation_ids.return_value = [] self.is_relation_made.return_value = False - self.leader_get.return_value = '10.10.10.10' self.get_cluster_hosts.return_value = [] + def _leader_get(key): + settings = {'leader-ip': '10.10.10.10', + 'cluster_series_upgrading': False} + return settings.get(key) + self.leader_get.side_effect = _leader_get + def test_config_changed_open_port(self): '''Ensure open_port is called with MySQL default port''' self.is_leader_bootstrapped.return_value = True diff --git a/unit_tests/test_percona_utils.py b/unit_tests/test_percona_utils.py index af55e85..006f387 100644 --- a/unit_tests/test_percona_utils.py +++ b/unit_tests/test_percona_utils.py @@ -377,6 +377,33 @@ class UtilsTests(CharmTestCase): self.config.side_effect = lambda key: _config.get(key) self.assertEqual(percona_utils.get_min_cluster_size(), 3) + @mock.patch("percona_utils.time") + @mock.patch("percona_utils.os") + def test_check_for_socket(self, _os, _time): + # Socket exists checking for exists + _os.path.exists.return_value = True + percona_utils.check_for_socket("filename", exists=True) + _time.sleep.assert_not_called() + + # Socket does not exist checking for exists + _os.path.exists.return_value = False + with self.assertRaises(Exception): + percona_utils.check_for_socket("filename", exists=True) + _time.sleep.assert_called_with(10) + + _time.reset_mock() + + # Socket does not exist checking for not exists + _os.path.exists.return_value = False + percona_utils.check_for_socket("filename", exists=False) + _time.sleep.assert_not_called() + + # Socket exists checking for not exists + _os.path.exists.return_value = True + with self.assertRaises(Exception): + percona_utils.check_for_socket("filename", exists=False) + _time.sleep.assert_called_with(10) + class UtilsTestsStatus(CharmTestCase):