Save the crash module auth key

Read the key set on the mon relation,
and use ceph-authtool to save it to a keyring,
for use by the crash module for crash reporting.

When this auth key is set, the crash module (enabled by default)
will update ceph-mon with a report.
It also results in a neat summary of recent crashes
that can be viewed by `ceph health detail`.
For example:

```
$ juju ssh ceph-mon/leader -- sudo ceph health detail

HEALTH_WARN 1 daemons have recently crashed
[WRN] RECENT_CRASH: 1 daemons have recently crashed
    osd.1 crashed on host node-3 at 2023-01-04T05:25:18.218628Z
```

ref. https://docs.ceph.com/en/latest/mgr/crash/

See also https://review.opendev.org/c/openstack/charm-ceph-mon/+/869138
for where the client_crash_key relation data set is implemented.

Depends-On: https://review.opendev.org/c/openstack/charm-ceph-mon/+/869138

Closes-Bug: #2000630
Change-Id: I77c84c368e6665e4988ebe9a735f000f99d0b78e
This commit is contained in:
Samuel Walladge 2023-01-04 15:59:04 +10:30
parent d13e6ba384
commit 97be046f9b
4 changed files with 19 additions and 1 deletions

View File

@ -80,6 +80,7 @@ from utils import (
import_osd_bootstrap_key,
import_osd_upgrade_key,
import_osd_removal_key,
import_client_crash_key,
get_host_ip,
get_networks,
assert_charm_supports_ipv6,
@ -653,6 +654,7 @@ def mon_relation():
bootstrap_key = relation_get('osd_bootstrap_key')
upgrade_key = relation_get('osd_upgrade_key')
removal_key = relation_get('osd_disk_removal_key')
client_crash_key = relation_get('client_crash_key')
if get_fsid() and get_auth() and bootstrap_key:
log('mon has provided conf- scanning disks')
emit_cephconf()
@ -664,6 +666,8 @@ def mon_relation():
_, settings, _ = (ch_ceph.CephOSDConfContext()
.filter_osd_from_mon_settings())
ceph.apply_osd_settings(settings)
if client_crash_key:
import_client_crash_key(client_crash_key)
else:
log('mon cluster has not yet provided conf')

View File

@ -73,6 +73,7 @@ except ImportError:
_bootstrap_keyring = "/var/lib/ceph/bootstrap-osd/ceph.keyring"
_upgrade_keyring = "/var/lib/ceph/osd/ceph.client.osd-upgrade.keyring"
_removal_keyring = "/var/lib/ceph/osd/ceph.client.osd-removal.keyring"
_client_crash_keyring = "/var/lib/ceph/osd/ceph.client.crash.keyring"
def is_osd_bootstrap_ready():
@ -129,6 +130,16 @@ def import_osd_removal_key(key):
_import_key(key, _removal_keyring, 'client.osd-removal')
def import_client_crash_key(key):
"""
Ensure that the client.crash keyring is set up.
:param key: The cephx key to add to the client.crash keyring
:type key: str
:raises: subprocess.CalledProcessError"""
_import_key(key, _client_crash_keyring, 'client.crash')
def render_template(template_name, context, template_dir=TEMPLATES_DIR):
"""Render Jinja2 template.

View File

@ -695,7 +695,7 @@ def get_local_osd_ids():
try:
dirs = os.listdir(osd_path)
for osd_dir in dirs:
osd_id = osd_dir.split('-')[1]
osd_id = osd_dir.split('-')[1] if '-' in osd_dir else ''
if (_is_int(osd_id) and
filesystem_mounted(os.path.join(
os.sep, osd_path, osd_dir))):

View File

@ -54,6 +54,9 @@ keyring = /var/lib/ceph/osd/ceph.client.osd-upgrade.keyring
[client.osd-removal]
keyring = /var/lib/ceph/osd/ceph.client.osd-removal.keyring
[client.crash]
keyring = /var/lib/ceph/osd/ceph.client.crash.keyring
[mon]
keyring = /var/lib/ceph/mon/$cluster-$id/keyring