Improve Vault startup handling

The `@when_file_changed` decorator is not considered reliable.
Additionally, the way it was being used led to a race condition where
the Vault service might never get started. This also detects and reports
in a better way if Vault fails to start.

Change-Id: If6153377cd516ed8121e09da627905036128a6ec
This commit is contained in:
Cory Johns 2019-02-07 17:43:02 -05:00
parent f5fa5d8758
commit 102b222fce
3 changed files with 53 additions and 7 deletions

View File

@ -2,6 +2,7 @@ import base64
import psycopg2
import subprocess
import tenacity
import traceback
import yaml
from pathlib import Path
@ -48,7 +49,7 @@ from charms.reactive import (
remove_state,
set_state,
when,
when_file_changed,
any_file_changed,
when_not,
when_any,
)
@ -188,6 +189,9 @@ def configure_vault(context):
log("Opening vault port", level=DEBUG)
open_port(8200)
set_flag('configured')
if any_file_changed([VAULT_CONFIG, VAULT_SYSTEMD_CONFIG]):
# force a restart if config has changed
clear_flag('started')
@when('snap.installed.vault')
@ -369,12 +373,25 @@ def cluster_connected(hacluster):
hacluster.bind_resources()
@when_file_changed(VAULT_CONFIG, VAULT_SYSTEMD_CONFIG)
def file_change_auto_unlock_mode():
log("Calling opportunistic_restart", level=DEBUG)
@when('configured')
@when_not('started')
def start_vault():
# start or restart vault
vault.opportunistic_restart()
if config('totally-unsecure-auto-unlock'):
vault.prepare_vault()
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, max=10),
stop=tenacity.stop_after_attempt(10),
retry=tenacity.retry_if_result(lambda b: not b))
def _check_vault_running():
return service_running('vault')
if _check_vault_running():
set_flag('started')
clear_flag('failed.to.start')
if config('totally-unsecure-auto-unlock'):
vault.prepare_vault()
else:
set_flag('failed.to.start')
@when('leadership.is_leader')
@ -590,9 +607,19 @@ def _assess_status():
"Set complete when finished.")
return
if is_flag_set('failed.to.start'):
status_set("blocked",
"Vault failed to start; check journalctl -u vault")
return
health = None
if service_running('vault'):
health = vault.get_vault_health()
try:
health = vault.get_vault_health()
except Exception:
log(traceback.format_exc(), level=ERROR)
status_set('blocked', 'Vault health check failed')
return
else:
status_set('blocked', 'Vault service not running')
return

View File

@ -33,3 +33,7 @@ commands =
[testenv:venv]
commands = {posargs}
[flake8]
# E402 ignore necessary for path append before sys module import in actions
ignore = E402,W504

View File

@ -736,3 +736,18 @@ class TestHandlers(unit_tests.test_utils.CharmTestCase):
handlers.tune_pki_backend()
vault_pki.tune_pki_backend.assert_called_once_with()
self.set_flag.assert_called_once_with('pki.backend.tuned')
@mock.patch.object(handlers, 'config')
@mock.patch.object(handlers, 'clear_flag')
@mock.patch.object(handlers, 'set_flag')
@mock.patch.object(handlers.vault, 'prepare_vault')
@mock.patch.object(handlers.vault, 'opportunistic_restart')
@mock.patch.object(handlers, 'service_running')
def test_start_vault(self, service_running, opportunistic_restart,
prepare_vault, set_flag, clear_flag, config):
service_running.side_effect = [False, True]
config.return_value = True
handlers.start_vault()
assert service_running.call_count == 2
set_flag.assert_called_once_with('started')
prepare_vault.assert_called_once_with()