From cfc2d4abd8962a82caab0232a3e52ce7472e512e Mon Sep 17 00:00:00 2001 From: Matthew Heler Date: Sat, 3 Nov 2018 02:34:47 -0500 Subject: [PATCH] Document howto recover from a Ceph namspace deletion Change-Id: Ib1b03cd046fbdad6f18478cfa9c9f0bf70ec9430 --- doc/source/testing/ceph-resiliency/index.rst | 1 + .../ceph-resiliency/namespace-deletion.rst | 222 ++++++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 doc/source/testing/ceph-resiliency/namespace-deletion.rst diff --git a/doc/source/testing/ceph-resiliency/index.rst b/doc/source/testing/ceph-resiliency/index.rst index 3dc3ccd93..91f4ebb27 100644 --- a/doc/source/testing/ceph-resiliency/index.rst +++ b/doc/source/testing/ceph-resiliency/index.rst @@ -8,3 +8,4 @@ Ceph Resiliency README failure-domain validate-object-replication + namespace-deletion diff --git a/doc/source/testing/ceph-resiliency/namespace-deletion.rst b/doc/source/testing/ceph-resiliency/namespace-deletion.rst new file mode 100644 index 000000000..bc1285607 --- /dev/null +++ b/doc/source/testing/ceph-resiliency/namespace-deletion.rst @@ -0,0 +1,222 @@ +=============================== +3. Namespace deletion recovery +=============================== + +This document captures steps to bring Ceph back up after deleting it's associated namespace. + +3.1 Setup +========== + +.. note:: + Follow OSH single node or multinode guide to bring up OSH envronment. + +3.2 Setup the OSH environment and check ceph cluster health +============================================================= + +.. note:: + Ensure a healthy ceph cluster is running. + +.. code-block:: console + + kubectl exec -n ceph ceph-mon-dtw6m -- ceph -s + cluster: + id: fbaf9ce8-5408-4fce-9bfe-bf7fb938474c + health: HEALTH_OK + + services: + mon: 5 daemons, quorum osh-1,osh-2,osh-5,osh-4,osh-3 + mgr: osh-3(active), standbys: osh-4 + mds: cephfs-1/1/1 up {0=mds-ceph-mds-77dc68f476-jb5th=up:active}, 1 up:standby + osd: 15 osds: 15 up, 15 in + + data: + pools: 18 pools, 182 pgs + objects: 21 objects, 2246 bytes + usage: 3025 MB used, 1496 GB / 1499 GB avail + pgs: 182 active+clean + +- Ceph cluster is in HEALTH_OK state with 5 MONs and 15 OSDs. + +3.3 Delete Ceph namespace +========================== + +.. note:: + Removing the namespace will delete all pods and secrets associated to Ceph. + !! DO NOT PROCEED WITH DELETING THE CEPH NAMESPACES ON A PRODUCTION ENVIRONMENT !! + +.. code-block:: console + + CEPH_NAMESPACE="ceph" + MON_POD=$(kubectl get pods --namespace=${CEPH_NAMESPACE} \ + --selector="application=ceph" --selector="component=mon" \ + --no-headers | awk '{ print $1; exit }') + + kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph status \ + | awk '/id:/{print $2}' | tee /tmp/ceph-fs-uuid.txt + +.. code-block:: console + + kubectl delete namespace ${CEPH_NAMESPACE} + +.. code-block:: console + + kubectl get pods --namespace ${CEPH_NAMESPACE} -o wide + No resources found. + + kubectl get secrets --namespace ${CEPH_NAMESPACE} + No resources found. + +- Ceph namespace is currently deleted and all associated resources will be not found. + +3.4 Reinstall Ceph charts +========================== + +.. note:: + Instructions are specific to a multinode environment. + For AIO environments follow the development guide for reinstalling Ceph. + +.. code-block:: console + + helm delete --purge ceph-openstack-config + + for chart in $(helm list --namespace ${CEPH_NAMESPACE} | awk '/ceph-/{print $1}'); do + helm delete ${chart} --purge; + done + +.. note:: + It will be normal not to see all PODs come back online during a reinstall. + Only the ceph-mon helm chart is required. + +.. code-block:: console + + cd /opt/openstack-helm-infra/ + ./tools/deployment/multinode/030-ceph.sh + +3.5 Disable CephX authentication +================================= + +.. note:: + Wait until MON pods are running before proceeding here. + +.. code-block:: console + + mkdir -p /tmp/ceph/ceph-templates /tmp/ceph/extracted-keys + + kubectl get -n ${CEPH_NAMESPACE} configmaps ceph-mon-etc -o=jsonpath='{.data.ceph\.conf}' > /tmp/ceph/ceph-mon.conf + sed '/\[global\]/a auth_client_required = none' /tmp/ceph/ceph-mon.conf | \ + sed '/\[global\]/a auth_service_required = none' | \ + sed '/\[global\]/a auth_cluster_required = none' > /tmp/ceph/ceph-mon-noauth.conf + + kubectl --namespace ${CEPH_NAMESPACE} delete configmap ceph-mon-etc + kubectl --namespace ${CEPH_NAMESPACE} create configmap ceph-mon-etc --from-file=ceph.conf=/tmp/ceph/ceph-mon-noauth.conf + + kubectl delete pod --namespace ${CEPH_NAMESPACE} -l application=ceph,component=mon + +.. note:: + Wait until the MON pods are running before proceeding here. + +.. code-block:: console + + MON_POD=$(kubectl get pods --namespace=${CEPH_NAMESPACE} \ + --selector="application=ceph" --selector="component=mon" \ + --no-headers | awk '{ print $1; exit }') + + kubectl exec --namespace=${CEPH_NAMESPACE} ${MON_POD} -- ceph status + +- The Ceph cluster will not be healthy and in a HEALTH_WARN or HEALTH_ERR state. + +3.6 Replace key secrets with ones extracted from a Ceph MON +============================================================ + +.. code-block:: console + + tee /tmp/ceph/ceph-templates/mon <