Enable pg_autoscaler for new Nautilus installations

This change also allows an admin to enable it for existing
installations that are upgraded to Nautilus via a config
option.

This change also enabled bionic-train to allow testing with
Ceph Nautilus.

func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/52
Depends-On: https://github.com/juju/charm-helpers/pull/343
Change-Id: Ic532204aa1576cdbeb47de7410f421aa9e4bac42
This commit is contained in:
Chris MacNaughton 2019-07-04 15:53:22 +02:00
parent 62155d10a8
commit a3dfd8041b
8 changed files with 317 additions and 7 deletions

View File

@ -156,7 +156,7 @@ options:
kernel.threads-max: 2097152 }'
description: |
YAML-formatted associative array of sysctl key/value pairs to be set
persistently. By default we set pid_max, max_map_count and
persistently. By default we set pid_max, max_map_count and
threads-max to a high value to avoid problems with large numbers (>20)
of OSDs recovering. very large clusters should set those values even
higher (e.g. max for kernel.pid_max is 4194303).
@ -196,7 +196,7 @@ options:
default: '1'
type: string
description: |
Recovery rate (in objects/s) below which we consider recovery
Recovery rate (in objects/s) below which we consider recovery
to be stalled.
nagios_raise_nodeepscrub:
default: True
@ -272,3 +272,13 @@ options:
least one pool (glance) loaded with a disproportionately high amount of
data/objects where other pools may remain empty. This can trigger HEALTH_WARN
if mon_pg_warn_max_object_skew is exceeded but that is actually false positive.
pg-autotune:
type: string
default: auto
description: |
The default configuration for pg-autotune will be to automatically enable
the module for new cluster installs on Ceph Nautilus, but to leave it
disabled for all cluster upgrades to Nautilus. To enable the pg-autotune
feature for upgraded clusters, the pg-autotune option should be set to
'true'. To disable the autotuner for new clusters, the pg-autotune option
should be set to 'false'.

View File

@ -78,7 +78,9 @@ from charmhelpers.contrib.network.ip import (
from charmhelpers.core.sysctl import create as create_sysctl
from charmhelpers.core.templating import render
from charmhelpers.contrib.storage.linux.ceph import (
CephConfContext)
CephConfContext,
enable_pg_autoscale,
)
from utils import (
add_rbd_mirror_features,
assert_charm_supports_ipv6,
@ -88,7 +90,8 @@ from utils import (
get_rbd_features,
has_rbd_mirrors,
get_ceph_osd_releases,
execute_post_osd_upgrade_steps
execute_post_osd_upgrade_steps,
mgr_enable_module,
)
from charmhelpers.contrib.charmsupport import nrpe
@ -265,6 +268,18 @@ def config_changed():
# must be set. This block is invoked when the user is trying to
# get out of that scenario by enabling no-bootstrap.
bootstrap_source_relation_changed()
# This will only ensure that we are enabled if the 'pg-autotune' option
# is explicitly set to 'true', and not if it is 'auto' or 'false'
if (config('pg-autotune') == 'true' and
cmp_pkgrevno('ceph', '14.2.0') >= 0):
# The return value of the enable_module call will tell us if the
# module was already enabled, in which case, we don't need to
# re-configure the already configured pools
if mgr_enable_module('pg_autoscaler'):
ceph.monitor_key_set('admin', 'autotune', 'true')
for pool in ceph.list_pools():
enable_pg_autoscale('admin', pool)
# unconditionally verify that the fsid and monitor-secret are set now
# otherwise we exit until a leader does this.
if leader_get('fsid') is None or leader_get('monitor-secret') is None:
@ -430,6 +445,22 @@ def mon_relation():
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
status_set('maintenance', 'Bootstrapping Ceph MGR')
ceph.bootstrap_manager()
if ceph.monitor_key_exists('admin', 'autotune'):
autotune = ceph.monitor_key_get('admin', 'autotune')
else:
autotune = config('pg-autotune')
if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and
(autotune == 'true' or
autotune == 'auto')):
ceph.monitor_key_set('admin', 'autotune', 'true')
else:
ceph.monitor_key_set('admin', 'autotune', 'false')
if ceph.monitor_key_get('admin', 'autotune') == 'true':
try:
mgr_enable_module('pg_autoscaler')
except subprocess.CalledProcessError:
log("Failed to initialize autoscaler, it must be "
"initialized on the last monitor", level='info')
# If we can and want to
if is_leader() and config('customize-failure-domain'):
# But only if the environment supports it
@ -806,6 +837,14 @@ def upgrade_charm():
mon_relation_joined()
if is_relation_made("nrpe-external-master"):
update_nrpe_config()
if not ceph.monitor_key_exists('admin', 'autotune'):
autotune = config('pg-autotune')
if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and
(autotune == 'true' or
autotune == 'auto')):
ceph.monitor_key_set('admin', 'autotune', 'true')
else:
ceph.monitor_key_set('admin', 'autotune', 'false')
# NOTE(jamespage):
# Reprocess broker requests to ensure that any cephx

View File

@ -301,6 +301,7 @@ class ReplicatedPool(Pool):
percent_data=10.0, app_name=None):
super(ReplicatedPool, self).__init__(service=service, name=name)
self.replicas = replicas
self.percent_data = percent_data
if pg_num:
# Since the number of placement groups were specified, ensure
# that there aren't too many created.
@ -324,12 +325,24 @@ class ReplicatedPool(Pool):
update_pool(client=self.service,
pool=self.name,
settings={'size': str(self.replicas)})
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
if nautilus_or_later:
# Ensure we set the expected pool ratio
update_pool(client=self.service,
pool=self.name,
settings={'target_size_ratio': str(self.percent_data / 100.0)})
try:
set_app_name_for_pool(client=self.service,
pool=self.name,
name=self.app_name)
except CalledProcessError:
log('Could not set app name for pool {}'.format(self.name, level=WARNING))
if 'pg_autoscaler' in enabled_manager_modules():
try:
enable_pg_autoscale(self.service, self.name)
except CalledProcessError as e:
log('Could not configure auto scaling for pool {}: {}'.format(
self.name, e, level=WARNING))
except CalledProcessError:
raise
@ -382,6 +395,18 @@ class ErasurePool(Pool):
name=self.app_name)
except CalledProcessError:
log('Could not set app name for pool {}'.format(self.name, level=WARNING))
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
if nautilus_or_later:
# Ensure we set the expected pool ratio
update_pool(client=self.service,
pool=self.name,
settings={'target_size_ratio': str(self.percent_data / 100.0)})
if 'pg_autoscaler' in enabled_manager_modules():
try:
enable_pg_autoscale(self.service, self.name)
except CalledProcessError as e:
log('Could not configure auto scaling for pool {}: {}'.format(
self.name, e, level=WARNING))
except CalledProcessError:
raise
@ -389,6 +414,34 @@ class ErasurePool(Pool):
Returns json formatted output"""
def enabled_manager_modules():
"""Return a list of enabled manager modules.
:rtype: List[str]
"""
cmd = ['ceph', 'mgr', 'module', 'ls']
try:
modules = check_output(cmd)
if six.PY3:
modules = modules.decode('utf-8')
except CalledProcessError as e:
log("Failed to list ceph modules: {}".format(e), WARNING)
return []
modules = json.loads(modules)
return modules['enabled_modules']
def enable_pg_autoscale(service, pool_name):
"""
Enable Ceph's PG autoscaler for the specified pool.
:param service: six.string_types. The Ceph user name to run the command under
:param pool_name: six.string_types. The name of the pool to enable sutoscaling on
:raise: CalledProcessError if the command fails
"""
check_call(['ceph', '--id', service, 'osd', 'pool', 'set', pool_name, 'pg_autoscale_mode', 'on'])
def get_mon_map(service):
"""
Returns the current monitor map.

View File

@ -43,6 +43,7 @@ from charmhelpers.contrib.network.ip import (
get_address_in_network,
get_ipv6_addr
)
from charmhelpers.contrib.storage.linux import ceph
try:
import dns.resolver
@ -69,6 +70,20 @@ def enable_pocket(pocket):
sources.write(line)
def mgr_enable_module(module):
"""Enable a Ceph Manager Module.
:param module: The module name to enable
:type module: str
:raises: subprocess.CalledProcessError
"""
if module not in ceph.enabled_manager_modules():
subprocess.check_call(['ceph', 'mgr', 'module', 'enable', module])
return True
return False
@cached
def get_unit_hostname():
return socket.gethostname()

View File

@ -0,0 +1,103 @@
series: bionic
applications:
ceph-osd:
charm: cs:~openstack-charmers-next/ceph-osd
num_units: 3
storage:
osd-devices: 'cinder,10G'
options:
osd-devices: '/dev/test-non-existent'
source: cloud:bionic-train/proposed
ceph-mon:
charm: ceph-mon
series: bionic
num_units: 3
options:
monitor-count: '3'
source: cloud:bionic-train/proposed
percona-cluster:
charm: cs:~openstack-charmers-next/percona-cluster
num_units: 1
options:
dataset-size: 25%
max-connections: 1000
source: cloud:bionic-train/proposed
rabbitmq-server:
charm: cs:~openstack-charmers-next/rabbitmq-server
num_units: 1
options:
source: cloud:bionic-train/proposed
keystone:
expose: True
charm: cs:~openstack-charmers-next/keystone
num_units: 1
options:
openstack-origin: cloud:bionic-train/proposed
nova-compute:
charm: cs:~openstack-charmers-next/nova-compute
num_units: 1
options:
openstack-origin: cloud:bionic-train/proposed
glance:
expose: True
charm: cs:~openstack-charmers-next/glance
num_units: 1
options:
openstack-origin: cloud:bionic-train/proposed
cinder:
expose: True
charm: cs:~openstack-charmers-next/cinder
num_units: 1
options:
block-device: 'None'
glance-api-version: '2'
openstack-origin: cloud:bionic-train/proposed
cinder-ceph:
charm: cs:~openstack-charmers-next/cinder-ceph
nova-cloud-controller:
expose: True
charm: cs:~openstack-charmers-next/nova-cloud-controller
num_units: 1
options:
openstack-origin: cloud:bionic-train/proposed
relations:
- - nova-compute:amqp
- rabbitmq-server:amqp
- - nova-compute:image-service
- glance:image-service
- - nova-compute:ceph
- ceph-mon:client
- - keystone:shared-db
- percona-cluster:shared-db
- - glance:shared-db
- percona-cluster:shared-db
- - glance:identity-service
- keystone:identity-service
- - glance:amqp
- rabbitmq-server:amqp
- - glance:ceph
- ceph-mon:client
- - cinder:shared-db
- percona-cluster:shared-db
- - cinder:identity-service
- keystone:identity-service
- - cinder:amqp
- rabbitmq-server:amqp
- - cinder:image-service
- glance:image-service
- - cinder-ceph:storage-backend
- cinder:storage-backend
- - cinder-ceph:ceph
- ceph-mon:client
- - ceph-osd:mon
- ceph-mon:osd
- - nova-cloud-controller:shared-db
- percona-cluster:shared-db
- - nova-cloud-controller:identity-service
- keystone:identity-service
- - nova-cloud-controller:amqp
- rabbitmq-server:amqp
- - nova-cloud-controller:cloud-compute
- nova-compute:cloud-compute
- - nova-cloud-controller:image-service
- glance:image-service

View File

@ -1,5 +1,6 @@
charm_name: ceph-mon
gate_bundles:
- bionic-train
- bionic-stein
- bionic-rocky
- bionic-queens

View File

@ -19,7 +19,6 @@ with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec:
lambda *args, **kwargs: f(*args, **kwargs))
import ceph_hooks
TO_PATCH = [
'config',
'is_leader',
@ -56,9 +55,10 @@ CHARM_CONFIG = {'config-flags': '',
'disable-pg-max-object-skew': False}
class CephHooksTestCase(unittest.TestCase):
class CephHooksTestCase(test_utils.CharmTestCase):
def setUp(self):
super(CephHooksTestCase, self).setUp()
super(CephHooksTestCase, self).setUp(ceph_hooks, TO_PATCH)
self.config.side_effect = self.test_config.get
@patch.object(ceph_hooks, 'get_rbd_features', return_value=None)
@patch.object(ceph_hooks, 'get_public_addr', lambda *args: "10.0.0.1")
@ -287,6 +287,80 @@ class CephHooksTestCase(unittest.TestCase):
relation_settings={
'nonce': 'FAKE-UUID'})
@patch.object(ceph_hooks.ceph, 'list_pools')
@patch.object(ceph_hooks, 'mgr_enable_module')
@patch.object(ceph_hooks, 'emit_cephconf')
@patch.object(ceph_hooks, 'create_sysctl')
@patch.object(ceph_hooks, 'check_for_upgrade')
@patch.object(ceph_hooks, 'get_mon_hosts')
@patch.object(ceph_hooks, 'bootstrap_source_relation_changed')
@patch.object(ceph_hooks, 'relations_of_type')
def test_config_changed_no_autotune(self,
relations_of_type,
bootstrap_source_rel_changed,
get_mon_hosts,
check_for_upgrade,
create_sysctl,
emit_ceph_conf,
mgr_enable_module,
list_pools):
relations_of_type.return_value = False
self.test_config.set('pg-autotune', 'false')
ceph_hooks.config_changed()
mgr_enable_module.assert_not_called()
@patch.object(ceph_hooks.ceph, 'monitor_key_set')
@patch.object(ceph_hooks.ceph, 'list_pools')
@patch.object(ceph_hooks, 'mgr_enable_module')
@patch.object(ceph_hooks, 'emit_cephconf')
@patch.object(ceph_hooks, 'create_sysctl')
@patch.object(ceph_hooks, 'check_for_upgrade')
@patch.object(ceph_hooks, 'get_mon_hosts')
@patch.object(ceph_hooks, 'bootstrap_source_relation_changed')
@patch.object(ceph_hooks, 'relations_of_type')
@patch.object(ceph_hooks, 'cmp_pkgrevno')
def test_config_changed_with_autotune(self,
cmp_pkgrevno,
relations_of_type,
bootstrap_source_rel_changed,
get_mon_hosts,
check_for_upgrade,
create_sysctl,
emit_ceph_conf,
mgr_enable_module,
list_pools, monitor_key_set):
relations_of_type.return_value = False
cmp_pkgrevno.return_value = 1
self.test_config.set('pg-autotune', 'true')
ceph_hooks.config_changed()
mgr_enable_module.assert_called_once_with('pg_autoscaler')
monitor_key_set.assert_called_once_with('admin', 'autotune', 'true')
@patch.object(ceph_hooks.ceph, 'list_pools')
@patch.object(ceph_hooks, 'mgr_enable_module')
@patch.object(ceph_hooks, 'emit_cephconf')
@patch.object(ceph_hooks, 'create_sysctl')
@patch.object(ceph_hooks, 'check_for_upgrade')
@patch.object(ceph_hooks, 'get_mon_hosts')
@patch.object(ceph_hooks, 'bootstrap_source_relation_changed')
@patch.object(ceph_hooks, 'relations_of_type')
@patch.object(ceph_hooks, 'cmp_pkgrevno')
def test_config_changed_with_default_autotune(self,
cmp_pkgrevno,
relations_of_type,
bootstrap_source_rel_changed,
get_mon_hosts,
check_for_upgrade,
create_sysctl,
emit_ceph_conf,
mgr_enable_module,
list_pools):
relations_of_type.return_value = False
cmp_pkgrevno.return_value = 1
self.test_config.set('pg-autotune', 'auto')
ceph_hooks.config_changed()
mgr_enable_module.assert_not_called()
class RelatedUnitsTestCase(unittest.TestCase):

View File

@ -39,6 +39,21 @@ class CephUtilsTestCase(test_utils.CharmTestCase):
_relation_ids.assert_called_once_with('rbd-mirror')
_related_units.assert_called_once_with('arelid')
@mock.patch.object(utils.ceph, 'enabled_manager_modules')
@mock.patch.object(utils.subprocess, 'check_call')
def test_mgr_enable_module(self, _call, _enabled_modules):
_enabled_modules.return_value = []
utils.mgr_enable_module('test-module')
_call.assert_called_once_with(
['ceph', 'mgr', 'module', 'enable', 'test-module'])
@mock.patch.object(utils.ceph, 'enabled_manager_modules')
@mock.patch.object(utils.subprocess, 'check_call')
def test_mgr_enable_module_again(self, _call, _enabled_modules):
_enabled_modules.return_value = ['test-module']
utils.mgr_enable_module('test-module')
_call.assert_not_called()
@mock.patch.object(utils.subprocess, 'check_output')
def test_get_default_rbd_features(self, _check_output):
_check_output.return_value = json.dumps(