From 0697559b51666197eecc0985611ee563c9a70c6d Mon Sep 17 00:00:00 2001 From: Trent Lloyd Date: Tue, 9 Apr 2019 14:30:34 +0800 Subject: [PATCH] wsrep_slave_threads: default to 48 on bionic This improves performance significantly for environments constrained by calls to sync() such as HDDs or lower-end SSDs (or just very busy environments running many queries) By default the the queries from other nodes are only processed with 1 thread, which means they will always run slower than on the master and any long running query will hold up all other queries behind it. Additionally, when multiple queries commit at once the server can combine them together into a single on-disk sync ('group commit') which is not possible otherwise. This optimisation appears to only occur on Bionic (Percona 5.7) and not Xenial (Percona 5.6). On Bionic, default to 48 threads which experimentally is a good number for OpenStack environments without being too crazy high. Galera ensures that queries that are dependent on each other are still executed sequentially and generally it is not expected to cause replication inconsistencies. However Percona Cluster 5.6 on Xenial appears to have a bug handling foreign key constraints that causes them to be violated (LP #1823850). The result is that the slave node crashes out and has to do a full SST to recover. The same issue is not present on the master. Thus we leave the default wsrep_slave_threads=1 on Xenial to avoid this issue for now particularly since Xenial does not appear to be able to use Group Commit to optimise the number of sync requests generated by the queries - so this option does not really improve performance there anyway. Partial-Bug: #1822903 Change-Id: Ic9cdd6562f30a3e52aa3d26fea53ba7c2bbdc771 --- config.yaml | 8 ++--- hooks/percona_hooks.py | 4 +++ unit_tests/test_percona_hooks.py | 53 ++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/config.yaml b/config.yaml index 1eebbd9..647cf0d 100644 --- a/config.yaml +++ b/config.yaml @@ -345,10 +345,10 @@ options: type: int default: description: | - Specifies the number of threads that can apply replication transactions in - parallel. Galera supports true parallel replication that applies - transactions in parallel only when it is safe to do so. Unset leaves the - default value of 1. + Specifies the number of threads that can apply replication transactions + in parallel. Galera supports true parallel replication that applies + transactions in parallel only when it is safe to do so. When unset + defaults to 48 for >= Bionic or 1 for <= Xenial. gcs-fc-limit: type: int default: diff --git a/hooks/percona_hooks.py b/hooks/percona_hooks.py index 54b1b6a..cbca469 100755 --- a/hooks/percona_hooks.py +++ b/hooks/percona_hooks.py @@ -239,12 +239,16 @@ def render_config(hosts=None): # only set it for PXC 5.6. context['myisam_recover'] = 'BACKUP' context['wsrep_provider'] = '/usr/lib/libgalera_smm.so' + if 'wsrep_slave_threads' not in context: + context['wsrep_slave_threads'] = 1 elif CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'bionic': context['wsrep_provider'] = '/usr/lib/galera3/libgalera_smm.so' context['default_storage_engine'] = 'InnoDB' context['wsrep_log_conflicts'] = True context['innodb_autoinc_lock_mode'] = '2' context['pxc_strict_mode'] = config('pxc-strict-mode') + if 'wsrep_slave_threads' not in context: + context['wsrep_slave_threads'] = 48 if config('databases-to-replicate'): context['databases_to_replicate'] = get_databases_to_replicate() diff --git a/unit_tests/test_percona_hooks.py b/unit_tests/test_percona_hooks.py index 5e9adf8..6fc93e7 100644 --- a/unit_tests/test_percona_hooks.py +++ b/unit_tests/test_percona_hooks.py @@ -724,6 +724,58 @@ class TestConfigs(CharmTestCase): default_config[k] = None return default_config + @mock.patch.object(os, 'makedirs') + @mock.patch.object(hooks, 'get_cluster_host_ip') + @mock.patch.object(hooks, 'get_wsrep_provider_options') + @mock.patch.object(PerconaClusterHelper, 'parse_config') + @mock.patch.object(hooks, 'render') + @mock.patch.object(hooks, 'sst_password') + @mock.patch.object(hooks, 'lsb_release') + def test_render_config_defaults_xenial(self, + lsb_release, + sst_password, + render, + parse_config, + get_wsrep_provider_options, + get_cluster_host_ip, + makedirs): + parse_config.return_value = {'key_buffer': '32M'} + get_cluster_host_ip.return_value = '10.1.1.1' + get_wsrep_provider_options.return_value = None + sst_password.return_value = 'sstpassword' + lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'} + context = { + 'wsrep_slave_threads': 1, + 'server-id': hooks.get_server_id(), + 'is_leader': hooks.is_leader(), + 'series_upgrade': hooks.is_unit_upgrading_set(), + 'private_address': '10.1.1.1', + 'cluster_hosts': '', + 'enable_binlogs': self.default_config['enable-binlogs'], + 'sst_password': 'sstpassword', + 'myisam_recover': 'BACKUP', + 'sst_method': self.default_config['sst-method'], + 'server_id': hooks.get_server_id(), + 'binlogs_max_size': self.default_config['binlogs-max-size'], + 'key_buffer': '32M', + 'performance_schema': self.default_config['performance-schema'], + 'binlogs_path': self.default_config['binlogs-path'], + 'cluster_name': 'juju_cluster', + 'binlogs_expire_days': self.default_config['binlogs-expire-days'], + 'ipv6': False, + 'innodb_file_per_table': + self.default_config['innodb-file-per-table'], + 'table_open_cache': self.default_config['table-open-cache'], + 'wsrep_provider': '/usr/lib/libgalera_smm.so', + } + + hooks.render_config() + hooks.render.assert_called_once_with( + 'mysqld.cnf', + '/etc/mysql/percona-xtradb-cluster.conf.d/mysqld.cnf', + context, + perms=0o444) + @mock.patch.object(os, 'makedirs') @mock.patch.object(hooks, 'get_cluster_host_ip') @mock.patch.object(hooks, 'get_wsrep_provider_options') @@ -745,6 +797,7 @@ class TestConfigs(CharmTestCase): sst_password.return_value = 'sstpassword' lsb_release.return_value = {'DISTRIB_CODENAME': 'bionic'} context = { + 'wsrep_slave_threads': 48, 'server_id': hooks.get_server_id(), 'server-id': hooks.get_server_id(), 'is_leader': hooks.is_leader(),