Do relation consistency sweep on leader change

The current charm design is to perform a sweep of all units
related on the identity-service interface to ensure that
they have all the correct setting values applied. If the
leader unit is deleted and a new one is elected this will
not happen until some event e.g. config-changed occurs. This
can result in remote units malfunctioning since they think they
are not configured. We resolve this by always doing a sweep when
the leader-elected hook fires.

Also fixes infinite loop edge case when ssl-cert-master switches
as a result of leader switch.

Change-Id: Icd68cc70d81d7d518c918e831056f686dbc7db1e
Closes-Bug: 1721269
This commit is contained in:
Edward Hope-Morley 2017-10-04 21:39:14 +01:00
parent 4dc04f9e2f
commit 68a0c87235
3 changed files with 76 additions and 32 deletions

View File

@ -145,6 +145,7 @@ from charmhelpers.payload.execd import execd_preinstall
from charmhelpers.contrib.peerstorage import (
peer_retrieve_by_prefix,
peer_echo,
relation_get as relation_get_and_migrate,
)
from charmhelpers.contrib.openstack.ip import (
ADMIN,
@ -585,7 +586,9 @@ def send_ssl_sync_request():
prev = bin(_prev)
if rid and prev ^ ssl_config:
clear_ssl_synced_units()
if is_leader():
clear_ssl_synced_units()
log("Setting %s=%s" % (key, bin(ssl_config)), level=DEBUG)
relation_set(relation_id=rid, relation_settings=settings)
@ -623,8 +626,12 @@ def cluster_changed():
peer_interface='cluster',
ensure_local_user=True)
# NOTE(jamespage) re-echo passwords for peer storage
echo_whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master',
echo_whitelist = ['_passwd', 'identity-service:',
'db-initialised', 'ssl-cert-available-updates']
# Don't echo if leader since a re-election may be in progress.
if not is_leader():
echo_whitelist.append('ssl-cert-master')
log("Peer echo whitelist: %s" % (echo_whitelist), level=DEBUG)
peer_echo(includes=echo_whitelist, force=True)
@ -632,14 +639,17 @@ def cluster_changed():
initialise_pki()
# Figure out if we need to mandate a sync
units = get_ssl_sync_request_units()
synced_units = relation_get(attribute='ssl-synced-units',
unit=local_unit())
diff = None
if synced_units:
synced_units = json.loads(synced_units)
diff = set(units).symmetric_difference(set(synced_units))
if is_leader():
# Figure out if we need to mandate a sync
units = get_ssl_sync_request_units()
synced_units = relation_get_and_migrate(attribute='ssl-synced-units',
unit=local_unit())
diff = None
if synced_units:
synced_units = json.loads(synced_units)
diff = set(units).symmetric_difference(set(synced_units))
else:
units = None
if units and (not synced_units or diff):
log("New peers joined and need syncing - %s" %
@ -648,7 +658,7 @@ def cluster_changed():
else:
update_all_identity_relation_units()
if not is_elected_leader(CLUSTER_RES) and is_ssl_cert_master():
if not is_leader() and is_ssl_cert_master():
# Force and sync and trigger a sync master re-election since we are not
# leader anymore.
force_ssl_sync()
@ -663,6 +673,8 @@ def leader_elected():
# to ensure that the cron jobs are active on this unit.
CONFIGS.write(TOKEN_FLUSH_CRON_FILE)
update_all_identity_relation_units()
@hooks.hook('leader-settings-changed')
def leader_settings_changed():

View File

@ -139,6 +139,7 @@ from charmhelpers.contrib.peerstorage import (
peer_store_and_set,
peer_store,
peer_retrieve,
relation_set as relation_set_and_migrate_to_leader,
)
from charmhelpers.core.templating import render
@ -1524,14 +1525,20 @@ def is_ssl_cert_master(votes=None):
return True
# Early in the process and juju leader
if not votes:
if not set_votes:
log("This unit is the juju leader and there are no votes yet, "
"becoming the ssl-cert-master",
level=DEBUG)
return True
elif (len(set_votes) == 1 and set_votes != set([local_unit()]) and
is_leader()):
log("This unit is the juju leader but not yet ssl-cert-master "
"(current votes = {})".format(set_votes), level=DEBUG)
return False
# Should never reach here
log("Could not determine the ssl-cert-master. Missing edge case.",
log("Could not determine the ssl-cert-master. Missing edge case. "
"(current votes = {})".format(set_votes),
level=ERROR)
return False
@ -1717,8 +1724,12 @@ def synchronize_ca(fatal=False):
if not os.path.isdir(SYNC_FLAGS_DIR):
mkdir(SYNC_FLAGS_DIR, SSH_USER, KEYSTONE_USER, 0o775)
restart_trigger = None
for action, services in peer_service_actions.iteritems():
create_peer_service_actions(action, set(services))
services = set(services)
if services:
restart_trigger = str(uuid.uuid4())
create_peer_service_actions(action, services)
create_peer_actions(peer_actions)
@ -1736,13 +1747,23 @@ def synchronize_ca(fatal=False):
if synced_units:
# Format here needs to match that used when peers request sync
synced_units = [u.replace('/', '-') for u in synced_units]
cluster_rel_settings['ssl-synced-units'] = \
ssl_synced_units = \
json.dumps(synced_units)
# NOTE(hopem): we pull this onto the leader settings to avoid
# unnecessary cluster relation noise. This is possible because the
# setting is only needed by the cert master.
if 'ssl-synced-units' not in leader_get():
rid = relation_ids('cluster')[0]
relation_set_and_migrate_to_leader(relation_id=rid,
**{'ssl-synced-units':
ssl_synced_units})
else:
leader_set({'ssl-synced-units': ssl_synced_units})
trigger = str(uuid.uuid4())
log("Sending restart-services-trigger=%s to all peers" % (trigger),
level=DEBUG)
cluster_rel_settings['restart-services-trigger'] = trigger
if restart_trigger:
log("Sending restart-services-trigger=%s to all peers" %
(restart_trigger), level=DEBUG)
cluster_rel_settings['restart-services-trigger'] = restart_trigger
log("Sync complete", level=DEBUG)
return cluster_rel_settings
@ -1756,8 +1777,11 @@ def clear_ssl_synced_units():
"""
log("Clearing ssl sync units", level=DEBUG)
for rid in relation_ids('cluster'):
relation_set(relation_id=rid,
relation_settings={'ssl-synced-units': None})
if 'ssl-synced-units' not in leader_get():
relation_set_and_migrate_to_leader(relation_id=rid,
**{'ssl-synced-units': None})
else:
leader_set({'ssl-synced-units': None})
def update_hash_from_path(hash, path, recurse_depth=10):
@ -1823,13 +1847,17 @@ def synchronize_ca_if_changed(force=False, fatal=False):
# If we are the sync master but not leader, ensure we have
# relinquished master status.
if not is_elected_leader(CLUSTER_RES):
log("Re-electing ssl cert master.", level=INFO)
peer_settings['ssl-cert-master'] = 'unknown'
cluster_rids = relation_ids('cluster')
if cluster_rids:
master = relation_get('ssl-cert-master',
rid=cluster_rids[0],
unit=local_unit())
if not is_leader() and master == local_unit():
log("Re-electing ssl cert master.", level=INFO)
peer_settings['ssl-cert-master'] = 'unknown'
if peer_settings:
for rid in relation_ids('cluster'):
relation_set(relation_id=rid,
if peer_settings:
relation_set(relation_id=cluster_rids[0],
relation_settings=peer_settings)
return ret

View File

@ -732,6 +732,7 @@ class KeystoneRelationTests(CharmTestCase):
hooks.cluster_joined(rid='foo:1', ssl_sync_request=False)
self.assertFalse(mock_send_ssl_sync_request.called)
@patch.object(hooks, 'relation_get_and_migrate')
@patch.object(hooks, 'initialise_pki')
@patch.object(hooks, 'update_all_identity_relation_units')
@patch.object(hooks, 'get_ssl_sync_request_units')
@ -753,16 +754,18 @@ class KeystoneRelationTests(CharmTestCase):
mock_is_ssl_cert_master,
mock_get_ssl_sync_request_units,
mock_update_all_identity_relation_units,
mock_initialise_pki):
mock_initialise_pki,
mock_relation_get_and_migrate):
relation_settings = {'foo_passwd': '123',
'identity-service:16_foo': 'bar'}
mock_relation_get_and_migrate.return_value = None
mock_is_ssl_cert_master.return_value = False
mock_peer_units.return_value = ['unit/0']
mock_ensure_ssl_cert_master.return_value = False
mock_relation_ids.return_value = []
self.is_elected_leader.return_value = False
self.is_leader.return_value = False
def fake_rel_get(attribute=None, *args, **kwargs):
if not attribute:
@ -775,8 +778,8 @@ class KeystoneRelationTests(CharmTestCase):
mock_config.return_value = None
hooks.cluster_changed()
whitelist = ['_passwd', 'identity-service:', 'ssl-cert-master',
'db-initialised', 'ssl-cert-available-updates']
whitelist = ['_passwd', 'identity-service:', 'db-initialised',
'ssl-cert-available-updates', 'ssl-cert-master']
self.peer_echo.assert_called_with(force=True, includes=whitelist)
ssh_authorized_peers.assert_called_with(
user=self.ssh_user, group='juju_keystone',
@ -784,8 +787,9 @@ class KeystoneRelationTests(CharmTestCase):
self.assertFalse(mock_synchronize_ca.called)
self.assertTrue(configs.write_all.called)
@patch.object(hooks, 'update_all_identity_relation_units')
@patch.object(hooks.CONFIGS, 'write')
def test_leader_elected(self, mock_write):
def test_leader_elected(self, mock_write, mock_update):
hooks.leader_elected()
mock_write.assert_has_calls([call(utils.TOKEN_FLUSH_CRON_FILE)])