Allow for temporary loss of the DB

MySQL topology changes, pause/resumes or even rolling restarts
can put the vault charm into an error state unnecessarily.

* Make the vault charm more robust to temporary MySQL unavailability.
* Make the workload status indicate to the end user when this occurs.

Closes-Bug: #1886083
Change-Id: I57ce8b7d3f778fb87ab01170db1b3770ad84badf
This commit is contained in:
David Ames 2020-05-22 14:07:34 -07:00
parent 3f94a10cbf
commit 25ac2cb0de
3 changed files with 54 additions and 13 deletions

View File

@ -680,11 +680,7 @@ def _assess_status():
application_version_set(health.get('version'))
else:
application_version_set('Unknown')
status_set('blocked', 'Vault health check failed')
return
if not service_running('vault'):
status_set('blocked', 'Vault service not running')
status_set('blocked', 'Unknown vault version')
return
if not health['initialized']:
@ -695,6 +691,10 @@ def _assess_status():
status_set('blocked', 'Unit is sealed')
return
if not client_approle_authorized():
status_set('blocked', 'Vault cannot authorize approle')
return
mlock_disabled = is_container() or config('disable-mlock')
status_set(
@ -707,6 +707,19 @@ def _assess_status():
)
def client_approle_authorized():
try:
vault.get_local_client()
return True
except (vault.hvac.exceptions.InternalServerError,
vault.VaultNotReady):
log("InternalServerError: Unable to athorize approle. "
"This may indicate failure to communicate with the database ",
"WARNING")
log(traceback.format_exc(), level=ERROR)
return False
@when_any('db.master.available', 'shared-db.available')
@when('leadership.is_leader',
'config.set.auto-generate-root-ca-cert')
@ -749,6 +762,9 @@ def takeover_cert_leadership():
'charm.vault.ca.ready',
'certificates.available')
def publish_ca_info():
if not client_approle_authorized():
log("Vault not authorized: Skipping publicsh_ca_info", "WARNING")
return
if is_unit_paused_set():
log("The Vault unit is paused, passing on publishing ca info.")
return
@ -778,6 +794,10 @@ def publish_global_client_cert():
(though some, like etcd, only block on the flag that it triggers but don't
actually use the cert), so we have to set it for now.
"""
if not client_approle_authorized():
log("Vault not authorized: Skipping publish_global_client_cert",
"WARNING")
return
cert_created = is_flag_set('charm.vault.global-client-cert.created')
reissue_requested = is_flag_set('certificates.reissue.global.requested')
tls = endpoint_from_flag('certificates.available')
@ -868,6 +888,10 @@ def tune_pki_backend():
@when('config.set.default-ttl')
@when('config.set.max-ttl')
def tune_pki_backend_config_changed():
if not client_approle_authorized():
log("Vault not authorized: Skipping tune_pki_backend_config_changed",
"WARNING")
return
if is_unit_paused_set():
log("The Vault unit is paused, passing on tunning pki backend.")
return

View File

@ -5,5 +5,4 @@ pbr
psycopg2
requests
jinja2
psutil

View File

@ -293,13 +293,16 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
self.is_flag_set.assert_called_with('etcd.tls.available')
self.config.assert_called_with('disable-mlock')
@patch.object(handlers, 'client_approle_authorized')
@patch.object(handlers, '_assess_interface_groups')
@patch.object(handlers.vault, 'get_vault_health')
def test_assess_status(self, get_vault_health,
_assess_interface_groups):
_assess_interface_groups,
_client_approle_authorized):
self.is_flag_set.return_value = False
get_vault_health.return_value = self._health_response
_assess_interface_groups.return_value = []
_client_approle_authorized.return_value = True
self.config.return_value = False
self.service_running.return_value = True
handlers._assess_status()
@ -342,7 +345,7 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
self.application_version_set.assert_called_with(
'Unknown')
self.status_set.assert_called_with(
'blocked', 'Vault health check failed')
'blocked', 'Unknown vault version')
def test_assess_status_invalid_channel(self):
statuses = {
@ -812,8 +815,11 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
self.set_flag.assert_called_with('failed.to.start')
assert not _vault.get_client.called
@mock.patch.object(handlers, 'client_approle_authorized')
@mock.patch.object(handlers, 'vault_pki')
def test_publish_global_client_cert_already_gend(self, vault_pki):
def test_publish_global_client_cert_already_gend(
self, vault_pki, _client_approle_authorized):
_client_approle_authorized.return_value = True
tls = self.endpoint_from_flag.return_value
self.is_flag_set.side_effect = [True, False]
self.unitdata.kv().get.return_value = {'certificate': 'crt',
@ -825,8 +831,11 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
'global-client-cert')
tls.set_client_cert.assert_called_with('crt', 'key')
@mock.patch.object(handlers, 'client_approle_authorized')
@mock.patch.object(handlers, 'vault_pki')
def test_publish_global_client_cert_reissue(self, vault_pki):
def test_publish_global_client_cert_reissue(
self, vault_pki, _client_approle_authorized):
_client_approle_authorized.return_value = True
self.config.return_value = {
'default-ttl': '3456h',
'max-ttl': '3456h',
@ -851,8 +860,11 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
'global-client-cert.created')
tls.set_client_cert.assert_called_with('crt', 'key')
@mock.patch.object(handlers, 'client_approle_authorized')
@mock.patch.object(handlers, 'vault_pki')
def test_publish_global_client_certe(self, vault_pki):
def test_publish_global_client_certe(
self, vault_pki, _client_approle_authorized):
_client_approle_authorized.return_value = True
self.config.return_value = {
'default-ttl': '3456h',
'max-ttl': '3456h',
@ -957,16 +969,22 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
assert not vault_pki.tune_pki_backend.called
assert not vault_pki.update_roles.called
@mock.patch.object(handlers, 'client_approle_authorized')
@mock.patch.object(handlers, 'vault_pki')
def test_tune_pki_backend_config_changed_paused(self, vault_pki):
def test_tune_pki_backend_config_changed_paused(
self, vault_pki, _client_approle_authorized):
_client_approle_authorized.return_value = True
self.is_unit_paused_set.return_value = True
handlers.tune_pki_backend_config_changed()
assert not vault_pki.tune_pki_backend.called
assert not vault_pki.update_roles.called
@mock.patch.object(handlers, 'client_approle_authorized')
@mock.patch.object(handlers, 'vault_pki')
def test_tune_pki_backend_config_changed_notrunning(self, vault_pki):
def test_tune_pki_backend_config_changed_notrunning(
self, vault_pki, _client_approle_authorized):
_client_approle_authorized.return_value = True
self.is_unit_paused_set.return_value = False
self.service_running.return_value = False