Export crash dumps when Ceph daemons crash

This change configures Ceph daemon pods so that
/var/lib/ceph/crash maps to a hostPath location that persists
when the pod restarts. This will allow for post-mortem examination
of crash dumps to attempt to understand why daemons have crashed.

Change-Id: I53277848f79a405b0809e0e3f19d90bbb80f3df8
This commit is contained in:
Stephen Taylor 2021-06-29 07:58:23 -06:00
parent a2b9fe53e3
commit 07ceecd8d7
17 changed files with 65 additions and 8 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph Client
name: ceph-client
version: 0.1.19
version: 0.1.20
home: https://github.com/ceph/ceph-client
...

View File

@ -27,7 +27,7 @@ for keyring in ${OSD_BOOTSTRAP_KEYRING} ${MDS_BOOTSTRAP_KEYRING}; do
done
# Let's create the ceph directories
for DIRECTORY in mds tmp mgr; do
for DIRECTORY in mds tmp mgr crash; do
mkdir -p "/var/lib/ceph/${DIRECTORY}"
done

View File

@ -74,6 +74,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
containers:
- name: ceph-mds
{{ tuple $envAll "ceph_mds" | include "helm-toolkit.snippets.image" | indent 10 }}
@ -136,6 +139,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
volumes:
- name: pod-tmp
emptyDir: {}
@ -154,6 +160,10 @@ spec:
defaultMode: 0555
- name: pod-var-lib-ceph
emptyDir: {}
- name: pod-var-lib-ceph-crash
hostPath:
path: /var/lib/openstack-helm/ceph/crash
type: DirectoryOrCreate
- name: ceph-client-admin-keyring
secret:
secretName: {{ .Values.secrets.keyrings.admin }}

View File

@ -77,6 +77,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
containers:
- name: ceph-mgr
{{ tuple $envAll "ceph_mgr" | include "helm-toolkit.snippets.image" | indent 10 }}
@ -166,6 +169,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: ceph-client-bin
mountPath: /tmp/utils-checkPGs.py
subPath: utils-checkPGs.py
@ -192,6 +198,10 @@ spec:
defaultMode: 0444
- name: pod-var-lib-ceph
emptyDir: {}
- name: pod-var-lib-ceph-crash
hostPath:
path: /var/lib/openstack-helm/ceph/crash
type: DirectoryOrCreate
- name: ceph-client-admin-keyring
secret:
secretName: {{ .Values.secrets.keyrings.admin }}

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph Mon
name: ceph-mon
version: 0.1.9
version: 0.1.10
home: https://github.com/ceph/ceph
...

View File

@ -27,7 +27,7 @@ for keyring in ${OSD_BOOTSTRAP_KEYRING} ${MDS_BOOTSTRAP_KEYRING} ; do
done
# Let's create the ceph directories
for DIRECTORY in mon osd mds radosgw tmp mgr; do
for DIRECTORY in mon osd mds radosgw tmp mgr crash; do
mkdir -p "/var/lib/ceph/${DIRECTORY}"
done

View File

@ -99,6 +99,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: ceph-log-ownership
{{ tuple $envAll "ceph_mon" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ dict "envAll" $envAll "application" "mon" "container" "ceph_log_ownership" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
@ -228,6 +231,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: pod-var-log
mountPath: /var/log/ceph
readOnly: false
@ -252,6 +258,10 @@ spec:
- name: pod-var-lib-ceph
hostPath:
path: {{ .Values.conf.storage.mon.directory }}
- name: pod-var-lib-ceph-crash
hostPath:
path: /var/lib/openstack-helm/ceph/crash
type: DirectoryOrCreate
- name: ceph-client-admin-keyring
secret:
secretName: {{ .Values.secrets.keyrings.admin }}

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.24
version: 0.1.25
home: https://github.com/ceph/ceph
...

View File

@ -21,7 +21,7 @@ export LC_ALL=C
mkdir -p "$(dirname "${OSD_BOOTSTRAP_KEYRING}")"
# Let's create the ceph directories
for DIRECTORY in osd tmp; do
for DIRECTORY in osd tmp crash; do
mkdir -p "/var/lib/ceph/${DIRECTORY}"
done

View File

@ -130,6 +130,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: pod-var-lib-ceph-tmp
mountPath: /var/lib/ceph/tmp
readOnly: false
@ -251,6 +254,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: pod-var-lib-ceph-tmp
mountPath: /var/lib/ceph/tmp
readOnly: false
@ -411,6 +417,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: pod-var-lib-ceph-tmp
mountPath: /var/lib/ceph/tmp
readOnly: false
@ -455,6 +464,10 @@ spec:
emptyDir: {}
- name: pod-var-lib-ceph
emptyDir: {}
- name: pod-var-lib-ceph-crash
hostPath:
path: /var/lib/openstack-helm/ceph/crash
type: DirectoryOrCreate
- name: pod-var-lib-ceph-tmp
hostPath:
path: /var/lib/openstack-helm/ceph/var-tmp

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph RadosGW
name: ceph-rgw
version: 0.1.11
version: 0.1.12
home: https://github.com/ceph/ceph
...

View File

@ -25,7 +25,7 @@ for keyring in ${RGW_BOOTSTRAP_KEYRING}; do
done
# Let's create the ceph directories
for DIRECTORY in radosgw tmp; do
for DIRECTORY in radosgw tmp crash; do
mkdir -p "/var/lib/ceph/${DIRECTORY}"
done

View File

@ -92,6 +92,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
- name: ceph-rgw-init
{{ tuple $envAll "ceph_rgw" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.rgw | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
@ -182,6 +185,9 @@ spec:
- name: pod-var-lib-ceph
mountPath: /var/lib/ceph
readOnly: false
- name: pod-var-lib-ceph-crash
mountPath: /var/lib/ceph/crash
readOnly: false
{{- dict "enabled" .Values.manifests.certificates "name" $tls_secret "path" "/etc/tls" | include "helm-toolkit.snippets.tls_volume_mount" | indent 12 }}
volumes:
- name: pod-tmp
@ -201,6 +207,10 @@ spec:
defaultMode: 0444
- name: pod-var-lib-ceph
emptyDir: {}
- name: pod-var-lib-ceph-crash
hostPath:
path: /var/lib/openstack-helm/ceph/crash
type: DirectoryOrCreate
- name: ceph-bootstrap-rgw-keyring
secret:
secretName: {{ .Values.secrets.keyrings.rgw }}

View File

@ -20,4 +20,5 @@ ceph-client:
- 0.1.17 Add pool rename support for Ceph pools
- 0.1.18 Add pool delete support for Ceph pools
- 0.1.19 Use full image ref for docker official images
- 0.1.20 Export crash dumps when Ceph daemons crash
...

View File

@ -10,4 +10,5 @@ ceph-mon:
- 0.1.7 remove deprecated svc annotation tolerate-unready-endpoints
- 0.1.8 Use full image ref for docker official images
- 0.1.9 Remove unnecessary parameters for ceph-mon
- 0.1.10 Export crash dumps when Ceph daemons crash
...

View File

@ -25,4 +25,5 @@ ceph-osd:
- 0.1.22 Refactor Ceph OSD Init Scripts - Second PS
- 0.1.23 Use full image ref for docker official images
- 0.1.24 Ceph OSD Init Improvements
- 0.1.25 Export crash dumps when Ceph daemons crash
...

View File

@ -12,4 +12,5 @@ ceph-rgw:
- 0.1.9 Use full image ref for docker official images
- 0.1.10 Fix a bug in placement target deletion for new targets
- 0.1.11 Change s3 auth order to use local before external
- 0.1.12 Export crash dumps when Ceph daemons crash
...