Make service check output more verbose.

Checks used to assert services' status post upgrade don't log much on success or failure, for e.g: Waiting for haproxy pcs resource to start FAILURE: Haproxy pcs resource didn't get started after reboot This change adds some extra info to be displayed to make troubleshooting easier. Change-Id: I50ca550d225e52ba44b76a70b1db1c34088b88d7
2018-06-21 11:20:56 +02:00 · 2018-06-21 11:20:56 +02:00 · 0d471e91ef
parent 2ce0f125e1
commit 0d471e91ef
5 changed files with 14 additions and 5 deletions
--- a/templates/check_service_galera.sh.j2
+++ b/templates/check_service_galera.sh.j2
@ -9,6 +9,7 @@ while true; do
    echo "Waiting for galera pcs resource to start"
    GALERA_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:galera | grep -vi FAILED | grep -i master | wc -l)
    if [[ $GALERA_RES = 1 ]] || [[ $GALERA_RES > 2 ]]; then
+        echo "${GALERA_RES} instances of galera are started"
        break
    fi
    sleep 3
@ -33,7 +34,8 @@ while true; do
                (( elapsed_seconds += 3 ))
                if [ $elapsed_seconds -ge $timeout_seconds ]; then
                    echo "FAILURE: galera pcs resource didn't get started after reboot. Workaround for BZ#1499677 applied."
-                exit 1
+                    ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:galera'
+                    exit 1
                fi
            done
        else
--- a/templates/check_service_haproxy.sh.j2
+++ b/templates/check_service_haproxy.sh.j2
@ -12,13 +12,15 @@ if [[ $EXT_LB != 'false' ]]; then
        echo "Waiting for haproxy pcs resource to start"
        HAPROXY_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep haproxy-bundle | grep -i started | wc -l)
        if [[ $HAPROXY_RES = 1 ]] || [[ $HAPROXY_RES > 2 ]]; then
+            echo "${HAPROXY_RES} instances of haproxy-bundle are started"
            break
        fi
        sleep 3
        (( elapsed_seconds += 3 ))
        if [ $elapsed_seconds -ge $timeout_seconds ]; then
            echo "FAILURE: Haproxy pcs resource didn't get started after reboot"
-        exit 1
+            ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'haproxy-bundle'
+            exit 1
        fi
    done
 fi
--- a/templates/check_service_haproxy_backend.sh.j2
+++ b/templates/check_service_haproxy_backend.sh.j2
@ -14,13 +14,14 @@ sudo docker exec $(sudo docker ps | grep -oP haproxy-bundle.*) bash -c 'echo "sh
 SSH
        grep DOWN ~/haproxy.stats > /dev/null
        if [[ $? != 0 ]]; then
+            echo "HAproxy backends are ready"
            break
        fi
        sleep 3
        (( elapsed_seconds += 3 ))
        if [ $elapsed_seconds -ge $timeout_seconds ]; then
            echo "FAILURE: $(grep DOWN haproxy.stats | awk -F ',' {'print $1'}) is down on $(grep DOWN haproxy.stats | awk -F ',' {'print $2'})"
-        exit 1
+            exit 1
        fi
    done
 fi
--- a/templates/check_service_rabbitmq.sh.j2
+++ b/templates/check_service_rabbitmq.sh.j2
@ -9,12 +9,14 @@ while true; do
    echo "Waiting for rabbitmq pcs resource to start"
    RABBIT_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:rabbitmq-cluster | grep -vi FAILED | grep -i started | wc -l)
    if [[ $RABBIT_RES = 1 ]] || [[ $RABBIT_RES > 2 ]]; then
+        echo "${RABBIT_RES} instances of rabbitmq pcs resource are started"
        break
    fi
    sleep 3
    (( elapsed_seconds += 3 ))
    if [ $elapsed_seconds -ge $timeout_seconds ]; then
        echo "FAILURE: Rabbitmq pcs resource didn't get started after reboot"
-    exit 1
+        ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:rabbitmq-cluster'
+        exit 1
    fi
 done
--- a/templates/check_service_redis.sh.j2
+++ b/templates/check_service_redis.sh.j2
@ -9,12 +9,14 @@ while true; do
    echo "Waiting for redis pcs resource to start"
    REDIS_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:redis | grep -vi FAILED | grep -i master | wc -l)
    if [[ $REDIS_RES = 1 ]]; then
+        echo "Redis master is ready"
        break
    fi
    sleep 3
    (( elapsed_seconds += 3 ))
    if [ $elapsed_seconds -ge $timeout_seconds ]; then
        echo "FAILURE: redis pcs resource didn't get started after reboot"
-    exit 1
+        ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:redis'
+        exit 1
    fi
 done