Fix peer readiness detection
Sharing the admin password with peers over the 'cluster' relation was needed in case the leader would die and the next leader would then need that information. This was implemented years ago when the leader DB didn't exist. This led to a race condition described in the mentioned bug and can now be safely removed. Validated by deploying several keystone and glance units, then removing the keystone leader, then adding a glance unit and checking that this new unit gets its service credentials. Also added useful traces, made linter happy and increased fernet token expiration to avoid spurious test failures. Closes-Bug: #1818113 Change-Id: I004903e50f51e190467d71691982de26518d7149
This commit is contained in:
parent
cd722cb6fe
commit
8b46dfd637
|
@ -225,7 +225,7 @@ SWIFT_CODENAMES = OrderedDict([
|
|||
('train',
|
||||
['2.22.0', '2.23.0']),
|
||||
('ussuri',
|
||||
['2.24.0']),
|
||||
['2.24.0', '2.25.0']),
|
||||
])
|
||||
|
||||
# >= Liberty version->codename mapping
|
||||
|
@ -2227,10 +2227,13 @@ def inform_peers_unit_state(state, relation_name='cluster'):
|
|||
if state not in UNIT_STATES:
|
||||
raise ValueError(
|
||||
"Setting invalid state {} for unit".format(state))
|
||||
this_unit = local_unit()
|
||||
for r_id in relation_ids(relation_name):
|
||||
juju_log('Telling peer behind relation {} that {} is {}'.format(
|
||||
r_id, this_unit, state), 'DEBUG')
|
||||
relation_set(relation_id=r_id,
|
||||
relation_settings={
|
||||
get_peer_key(local_unit()): state})
|
||||
get_peer_key(this_unit): state})
|
||||
|
||||
|
||||
def get_peers_unit_state(relation_name='cluster'):
|
||||
|
@ -2262,8 +2265,10 @@ def are_peers_ready(relation_name='cluster'):
|
|||
:returns: Whether all units are ready.
|
||||
:rtype: bool
|
||||
"""
|
||||
unit_states = get_peers_unit_state(relation_name)
|
||||
return all(v == UNIT_READY for v in unit_states.values())
|
||||
unit_states = get_peers_unit_state(relation_name).values()
|
||||
juju_log('{} peers are in the following states: {}'.format(
|
||||
relation_name, unit_states), 'DEBUG')
|
||||
return all(state == UNIT_READY for state in unit_states)
|
||||
|
||||
|
||||
def inform_peers_if_ready(check_unit_ready_func, relation_name='cluster'):
|
||||
|
@ -2346,7 +2351,9 @@ def get_api_application_status():
|
|||
app_state, msg = get_api_unit_status()
|
||||
if app_state == WORKLOAD_STATES.ACTIVE:
|
||||
if are_peers_ready():
|
||||
return WORKLOAD_STATES.ACTIVE, 'Application Ready'
|
||||
msg = 'Application Ready'
|
||||
else:
|
||||
return WORKLOAD_STATES.WAITING, 'Some units are not ready'
|
||||
app_state = WORKLOAD_STATES.WAITING
|
||||
msg = 'Some units are not ready'
|
||||
juju_log(msg, 'DEBUG')
|
||||
return app_state, msg
|
||||
|
|
|
@ -92,6 +92,7 @@ DEFAULT_PGS_PER_OSD_TARGET = 100
|
|||
DEFAULT_POOL_WEIGHT = 10.0
|
||||
LEGACY_PG_COUNT = 200
|
||||
DEFAULT_MINIMUM_PGS = 2
|
||||
AUTOSCALER_DEFAULT_PGS = 32
|
||||
|
||||
|
||||
class OsdPostUpgradeError(Exception):
|
||||
|
@ -399,16 +400,28 @@ class ReplicatedPool(Pool):
|
|||
|
||||
def create(self):
|
||||
if not pool_exists(self.service, self.name):
|
||||
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
|
||||
# Create it
|
||||
cmd = ['ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
self.name, str(self.pg_num)]
|
||||
if nautilus_or_later:
|
||||
cmd = [
|
||||
'ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
'--pg-num-min={}'.format(
|
||||
min(AUTOSCALER_DEFAULT_PGS, self.pg_num)
|
||||
),
|
||||
self.name, str(self.pg_num)
|
||||
]
|
||||
else:
|
||||
cmd = [
|
||||
'ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
self.name, str(self.pg_num)
|
||||
]
|
||||
|
||||
try:
|
||||
check_call(cmd)
|
||||
# Set the pool replica size
|
||||
update_pool(client=self.service,
|
||||
pool=self.name,
|
||||
settings={'size': str(self.replicas)})
|
||||
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
|
||||
if nautilus_or_later:
|
||||
# Ensure we set the expected pool ratio
|
||||
update_pool(client=self.service,
|
||||
|
@ -419,13 +432,13 @@ class ReplicatedPool(Pool):
|
|||
pool=self.name,
|
||||
name=self.app_name)
|
||||
except CalledProcessError:
|
||||
log('Could not set app name for pool {}'.format(self.name, level=WARNING))
|
||||
log('Could not set app name for pool {}'.format(self.name), level=WARNING)
|
||||
if 'pg_autoscaler' in enabled_manager_modules():
|
||||
try:
|
||||
enable_pg_autoscale(self.service, self.name)
|
||||
except CalledProcessError as e:
|
||||
log('Could not configure auto scaling for pool {}: {}'.format(
|
||||
self.name, e, level=WARNING))
|
||||
self.name, e), level=WARNING)
|
||||
except CalledProcessError:
|
||||
raise
|
||||
|
||||
|
@ -466,10 +479,24 @@ class ErasurePool(Pool):
|
|||
k = int(erasure_profile['k'])
|
||||
m = int(erasure_profile['m'])
|
||||
pgs = self.get_pgs(k + m, self.percent_data)
|
||||
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
|
||||
# Create it
|
||||
cmd = ['ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
self.name, str(pgs), str(pgs),
|
||||
'erasure', self.erasure_code_profile]
|
||||
if nautilus_or_later:
|
||||
cmd = [
|
||||
'ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
'--pg-num-min={}'.format(
|
||||
min(AUTOSCALER_DEFAULT_PGS, pgs)
|
||||
),
|
||||
self.name, str(pgs), str(pgs),
|
||||
'erasure', self.erasure_code_profile
|
||||
]
|
||||
else:
|
||||
cmd = [
|
||||
'ceph', '--id', self.service, 'osd', 'pool', 'create',
|
||||
self.name, str(pgs), str(pgs),
|
||||
'erasure', self.erasure_code_profile
|
||||
]
|
||||
|
||||
try:
|
||||
check_call(cmd)
|
||||
try:
|
||||
|
@ -477,8 +504,7 @@ class ErasurePool(Pool):
|
|||
pool=self.name,
|
||||
name=self.app_name)
|
||||
except CalledProcessError:
|
||||
log('Could not set app name for pool {}'.format(self.name, level=WARNING))
|
||||
nautilus_or_later = cmp_pkgrevno('ceph-common', '14.2.0') >= 0
|
||||
log('Could not set app name for pool {}'.format(self.name), level=WARNING)
|
||||
if nautilus_or_later:
|
||||
# Ensure we set the expected pool ratio
|
||||
update_pool(client=self.service,
|
||||
|
@ -489,7 +515,7 @@ class ErasurePool(Pool):
|
|||
enable_pg_autoscale(self.service, self.name)
|
||||
except CalledProcessError as e:
|
||||
log('Could not configure auto scaling for pool {}: {}'.format(
|
||||
self.name, e, level=WARNING))
|
||||
self.name, e), level=WARNING)
|
||||
except CalledProcessError:
|
||||
raise
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ def _snap_exec(commands):
|
|||
.format(SNAP_NO_LOCK_RETRY_COUNT))
|
||||
return_code = e.returncode
|
||||
log('Snap failed to acquire lock, trying again in {} seconds.'
|
||||
.format(SNAP_NO_LOCK_RETRY_DELAY, level='WARN'))
|
||||
.format(SNAP_NO_LOCK_RETRY_DELAY), level='WARN')
|
||||
sleep(SNAP_NO_LOCK_RETRY_DELAY)
|
||||
|
||||
return return_code
|
||||
|
|
|
@ -109,7 +109,6 @@ from keystone_utils import (
|
|||
setup_ipv6,
|
||||
send_notifications,
|
||||
is_db_initialised,
|
||||
filter_null,
|
||||
is_service_present,
|
||||
delete_service_entry,
|
||||
assess_status,
|
||||
|
@ -147,7 +146,6 @@ from charmhelpers.contrib.openstack.ha.utils import (
|
|||
|
||||
from charmhelpers.payload.execd import execd_preinstall
|
||||
from charmhelpers.contrib.peerstorage import (
|
||||
peer_retrieve_by_prefix,
|
||||
peer_echo,
|
||||
)
|
||||
from charmhelpers.contrib.openstack.ip import (
|
||||
|
@ -476,15 +474,6 @@ def identity_changed(relation_id=None, remote_unit=None):
|
|||
endpoints_checksum(endpoints[ep])
|
||||
)
|
||||
else:
|
||||
# Each unit needs to set the db information otherwise if the unit
|
||||
# with the info dies the settings die with it Bug# 1355848
|
||||
for rel_id in relation_ids('identity-service'):
|
||||
peerdb_settings = peer_retrieve_by_prefix(rel_id)
|
||||
# Ensure the null'd settings are unset in the relation.
|
||||
peerdb_settings = filter_null(peerdb_settings)
|
||||
if 'service_password' in peerdb_settings:
|
||||
relation_set(relation_id=rel_id, **peerdb_settings)
|
||||
|
||||
log('Deferring identity_changed() to service leader.')
|
||||
|
||||
if notifications_endpoints or notifications_checksums:
|
||||
|
|
|
@ -1520,6 +1520,12 @@ def ensure_initial_admin(config):
|
|||
'ldap' and config('ldap-readonly')):
|
||||
|
||||
admin_username = config('admin-user')
|
||||
|
||||
# NOTE(lourot): get_admin_passwd() will generate a new password if
|
||||
# the juju config or the leader DB doesn't contain already one. The
|
||||
# set_admin_passwd() callback will then store that password in the
|
||||
# leader DB. So if the leader dies, the new leader will still have
|
||||
# access to the password.
|
||||
if get_api_version() > 2:
|
||||
passwd = create_user_credentials(admin_username,
|
||||
get_admin_passwd,
|
||||
|
|
|
@ -27,7 +27,7 @@ applications:
|
|||
num_units: 3
|
||||
options:
|
||||
token-provider: 'fernet'
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -27,7 +27,7 @@ applications:
|
|||
num_units: 3
|
||||
options:
|
||||
openstack-origin: cloud:bionic-rocky
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -27,7 +27,7 @@ applications:
|
|||
num_units: 3
|
||||
options:
|
||||
openstack-origin: cloud:bionic-stein
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -27,7 +27,7 @@ applications:
|
|||
num_units: 3
|
||||
options:
|
||||
openstack-origin: cloud:bionic-train
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -26,7 +26,7 @@ applications:
|
|||
num_units: 3
|
||||
options:
|
||||
openstack-origin: cloud:bionic-ussuri
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -40,7 +40,7 @@ applications:
|
|||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
token-provider: 'fernet'
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '3'
|
||||
- '4'
|
||||
|
|
|
@ -28,7 +28,7 @@ applications:
|
|||
options:
|
||||
openstack-origin: cloud:xenial-ocata
|
||||
token-provider: 'fernet'
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -28,7 +28,7 @@ applications:
|
|||
options:
|
||||
openstack-origin: cloud:xenial-pike
|
||||
token-provider: 'fernet'
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
|
@ -28,7 +28,7 @@ applications:
|
|||
options:
|
||||
openstack-origin: cloud:xenial-queens
|
||||
token-provider: 'fernet'
|
||||
token-expiration: 60
|
||||
token-expiration: 300
|
||||
to:
|
||||
- '1'
|
||||
- '2'
|
||||
|
|
Loading…
Reference in New Issue