From 1e84d3f7149e57a3a9d3774badb77b5b391ccea9 Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Wed, 20 Mar 2024 12:08:42 -0600 Subject: [PATCH] [rook-ceph] Add a script to migrate Ceph clusters to Rook This change adds a deployment script that can be used to migrate a Ceph cluster deployed with the legacy openstack-helm-infra Ceph charts to Rook. This process is disruptive. The Ceph cluster goes down and comes back up multiple times during the migration, but the end result is a Rook-deployed Ceph cluster with the original cluster FSID and all OSD data intact. Change-Id: Ied8ff94f25cd792a9be9f889bb6fdabc45a57f2e --- tools/deployment/ceph/migrate-to-rook-ceph.sh | 253 ++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100755 tools/deployment/ceph/migrate-to-rook-ceph.sh diff --git a/tools/deployment/ceph/migrate-to-rook-ceph.sh b/tools/deployment/ceph/migrate-to-rook-ceph.sh new file mode 100755 index 000000000..3b00b2aa6 --- /dev/null +++ b/tools/deployment/ceph/migrate-to-rook-ceph.sh @@ -0,0 +1,253 @@ +#!/bin/bash + +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x + +# These variables can be set prior to running the script to deploy a specific +# Ceph release using a specific Rook release. The namespaces for the Rook +# operator and the Ceph cluster may also be set, along with the YAML definition +# files that should be used for the Rook operator and Ceph cluster Helm charts. +# The default values deploy the Rook operator in the rook-ceph namespace and +# the Ceph cluster in the ceph namespace using rook-operator.yaml and +# rook-ceph.yaml in the current directory. +ROOK_RELEASE=${ROOK_RELEASE:-1.13.7} +CEPH_RELEASE=${CEPH_RELEASE:-18.2.2} +ROOK_CEPH_NAMESPACE=${ROOK_CEPH_NAMESPACE:-rook-ceph} +CEPH_NAMESPACE=${CEPH_NAMESPCE:-ceph} +ROOK_OPERATOR_YAML=${ROOK_OPERATOR_YAML:-rook-operator.yaml} +ROOK_CEPH_YAML=${ROOK_CEPH_YAML:-rook-ceph.yaml} + +# Return a list of unique status strings for pods for a specified application +# (Pods with the same status will return a single status) +function app_status() { + kubectl -n ${CEPH_NAMESPACE} get pods -l app=${1} -o json | jq -r '.items[].status.phase' | sort | uniq +} + +# Function to wait for the initial Rook Ceph deployment to complete +function wait_for_initial_rook_deployment() { + set +x + echo "Waiting for initial Rook Ceph cluster deployment..." + + # The initial deployment can't deploy OSDs or RGW + while [[ "$(app_status rook-ceph-mon)" != "Running" || \ + "$(app_status rook-ceph-mgr)" != "Running" || \ + "$(app_status rook-ceph-mds)" != "Running" || \ + "$(app_status rook-ceph-tools)" != "Running" || \ + "$(app_status rook-ceph-exporter)" != "Running" || \ + "$(app_status rook-ceph-osd-prepare)" != "Succeeded" ]] + do + sleep 5 + done + set -x +} + +# Function to wait for a full cluster deployment +function wait_for_full_rook_deployment() { + set +x + echo "Waiting for full Rook Ceph cluster deployment..." + + # Look for everything from the initial deployment plus OSDs and RGW + while [[ "$(app_status rook-ceph-mon)" != "Running" || \ + "$(app_status rook-ceph-mgr)" != "Running" || \ + "$(app_status rook-ceph-mds)" != "Running" || \ + "$(app_status rook-ceph-tools)" != "Running" || \ + "$(app_status rook-ceph-exporter)" != "Running" || \ + "$(app_status rook-ceph-osd-prepare)" != "Succeeded" || \ + "$(app_status rook-ceph-osd)" != "Running" || \ + "$(app_status rook-ceph-rgw)" != "Running" ]] + do + sleep 5 + done + set -x +} + +# Function to wait for all pods except rook-ceph-tools to terminate +function wait_for_terminate() { + set +x + echo "Waiting for pods to terminate..." + + while [[ $(kubectl -n ${CEPH_NAMESPACE} get pods | grep -c "Running") -gt 1 ]] + do + sleep 5 + done + set -x +} + +# Function to wait for Ceph to reach a HEALTH_OK state +function wait_for_health_checks() { + CEPH_NAMESPACE=${1} + CLIENT_POD=${2} + set +x + echo "Waiting for the Ceph cluster to reach HEALTH_OK with all of the expectd resources..." + + # Time out each loop after ~15 minutes + for retry in {0..180} + do + if [[ $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mon stat -f json | jq -r '.quorum[].name' | wc -l) -eq ${MON_COUNT} && + $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph mgr count-metadata name | jq '.unknown') -eq ${MGR_COUNT} && + $(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph osd stat -f json | jq '.num_up_osds') -eq ${OSD_COUNT} ]] + then + break + fi + sleep 5 + done + + for retry in {0..180} + do + if [[ "$(kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph health)" == "HEALTH_OK" ]] + then + break + fi + sleep 5 + done + + kubectl -n ${CEPH_NAMESPACE} exec ${CLIENT_POD} -- ceph status + set -x +} + +# Save a legacy ceph-mon host and the existing cluster FSID for later +export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].metadata.name') +export FSID=$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fsid) +export OLD_MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq -r '.items[0].spec.nodeName') +export OLD_MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.OLD_MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address') +export MON_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mon -o json | jq '.items | length') +export MGR_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=mgr -o json | jq '.items | length') +export OSD_COUNT=$(kubectl -n ${CEPH_NAMESPACE} get pods -l component=osd -o json | jq '.items | length') + +# Rename CephFS pools to match the expected names for Rook CephFS +FS_SPEC="$(kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph fs ls -f json 2> /dev/null)" +for fs in $(echo $FS_SPEC | jq -r '.[].name') +do + EXPECTED_METADATA_POOL="${fs}-metadata" + METADATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .metadata_pool") + + if [[ "${METADATA_POOL}" != "${EXPECTED_METADATA_POOL}" ]] + then + kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${METADATA_POOL} ${EXPECTED_METADATA_POOL} + fi + + EXPECTED_DATA_POOL="${fs}-data" + # NOTE: Only one data pool must have the expected name. Only the first one is + # checked here. If it is renamed and another pool with the same name already + # exists, the rename will fail and there is no further action needed. + DATA_POOL=$(echo ${FS_SPEC} | jq -r ".[] | select(.name==\"${fs}\") | .data_pools[0]") + + if [[ "${DATA_POOL}" != "${EXPECTED_DATA_POOL}" ]] + then + kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- ceph osd pool rename ${DATA_POOL} ${EXPECTED_DATA_POOL} + fi +done + +# Destroy resources in the Ceph namespace, delete Helm charts, and remove Ceph-related node labels +for resource in cj deploy ds service job +do + kubectl -n ${CEPH_NAMESPACE} get ${resource} -o json | jq -r '.items[].metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete ${resource} +done +helm -n ${CEPH_NAMESPACE} delete ceph-provisioners +helm -n ${CEPH_NAMESPACE} delete ceph-client +helm -n ${CEPH_NAMESPACE} delete ceph-mon +helm -n ${CEPH_NAMESPACE} delete ceph-osd +for node in $(kubectl get nodes -o json | jq -r '.items[].metadata.name' | xargs) +do + kubectl label node ${node} ceph-mds- ceph-mgr- ceph-mon- ceph-osd- ceph-rgw- +done + +# Use rook-helm to deploy a new Ceph cluster +helm repo add rook-release https://charts.rook.io/release +helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph --version ${ROOK_RELEASE} -f ${ROOK_OPERATOR_YAML} +helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f ${ROOK_CEPH_YAML} +wait_for_initial_rook_deployment + +# Retrieve the keyring from the new mon pod and save its host for further work +export MON_POD=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].metadata.name') +kubectl -n ${CEPH_NAMESPACE} exec ${MON_POD} -- cat /etc/ceph/keyring-store/keyring > /tmp/mon-a.keyring +export MON_HOST=$(kubectl -n ${CEPH_NAMESPACE} get pods -l app=rook-ceph-mon -o json | jq -r '.items[0].spec.nodeName') +export MON_HOST_IP=$(kubectl get nodes -o json | jq -r '.items[] | select(.metadata.name == env.MON_HOST) | .status.addresses | .[] | select(.type == "InternalIP") | .address') + +# Shut down the Rook operator, delete the rook-ceph deployments, and get the new rook-ceph-mon IP address +kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0 +kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy +MON_IP=$(kubectl -n ${CEPH_NAMESPACE} get service rook-ceph-mon-a -o json | jq -r '.spec.clusterIP') +wait_for_terminate + +# Download the old mon store and update its key to the new one +ssh ${MON_HOST_IP} "sudo rm -rf /var/lib/rook/mon-a/data" +ssh ${OLD_MON_HOST_IP} "sudo chmod -R a+rX /var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST}" +scp -rp ${OLD_MON_HOST_IP}:/var/lib/openstack-helm/ceph/mon/mon/ceph-${OLD_MON_HOST} /tmp +mv /tmp/ceph-${OLD_MON_HOST} /tmp/mon-a +grep -A2 "\[mon\.\]" /tmp/mon-a.keyring > /tmp/mon-a/keyring + +# Generate a script to rewrite the monmap in the old mon store +cat > /tmp/mon-a/fix-monmap.sh < /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e \" key = ${CLIENT_KEY}\" >> /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mds = \"allow *\"' >> /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mon = \"allow *\"' >> /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps osd = \"allow *\"' >> /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- bash -c "echo -e ' caps mgr = \"allow *\"' >> /tmp/keyring" +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- ceph auth import -i /tmp/keyring +kubectl -n ${CEPH_NAMESPACE} exec ${TOOLS_POD} -- rm /tmp/keyring + +# Remove the auth config options to re-enable authentication +kubectl -n ${CEPH_NAMESPACE} get cm rook-config-override -o yaml | \ +sed '/ auth_cluster_required = none/d' | \ +sed '/ auth_service_required = none/d' | \ +sed '/ auth_client_required = none/d' | \ +sed '/ auth_supported = none/d' | \ +kubectl apply -f - + +# Restart the Rook operator and Ceph cluster with the new config +kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=0 +kubectl -n ${CEPH_NAMESPACE} get deploy -o json | jq -r '.items[] | select(.metadata.name != "rook-ceph-tools") | .metadata.name' | xargs kubectl -n ${CEPH_NAMESPACE} delete deploy +wait_for_terminate +kubectl -n ${ROOK_CEPH_NAMESPACE} scale deploy rook-ceph-operator --replicas=1 +wait_for_full_rook_deployment + +# Scale the mon and mgr deployments to original replica counts +kubectl -n ${CEPH_NAMESPACE} get cephcluster ceph -o json | \ +jq ".spec.mon.count = ${MON_COUNT} | .spec.mgr.count = ${MGR_COUNT}" | \ +kubectl apply -f - +wait_for_health_checks ${CEPH_NAMESPACE} ${TOOLS_POD}