Make service check output more verbose.

Checks used to assert services' status post upgrade don't log
much on success or failure, for e.g:
  Waiting for haproxy pcs resource to start
  FAILURE: Haproxy pcs resource didn't get started after reboot
This change adds some extra info to be displayed to make
 troubleshooting easier.

Change-Id: I50ca550d225e52ba44b76a70b1db1c34088b88d7
This commit is contained in:
Yurii Prokulevych 2018-06-21 11:20:56 +02:00
parent 2ce0f125e1
commit 0d471e91ef
5 changed files with 14 additions and 5 deletions

View File

@ -9,6 +9,7 @@ while true; do
echo "Waiting for galera pcs resource to start"
GALERA_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:galera | grep -vi FAILED | grep -i master | wc -l)
if [[ $GALERA_RES = 1 ]] || [[ $GALERA_RES > 2 ]]; then
echo "${GALERA_RES} instances of galera are started"
break
fi
sleep 3
@ -33,7 +34,8 @@ while true; do
(( elapsed_seconds += 3 ))
if [ $elapsed_seconds -ge $timeout_seconds ]; then
echo "FAILURE: galera pcs resource didn't get started after reboot. Workaround for BZ#1499677 applied."
exit 1
ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:galera'
exit 1
fi
done
else

View File

@ -12,13 +12,15 @@ if [[ $EXT_LB != 'false' ]]; then
echo "Waiting for haproxy pcs resource to start"
HAPROXY_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep haproxy-bundle | grep -i started | wc -l)
if [[ $HAPROXY_RES = 1 ]] || [[ $HAPROXY_RES > 2 ]]; then
echo "${HAPROXY_RES} instances of haproxy-bundle are started"
break
fi
sleep 3
(( elapsed_seconds += 3 ))
if [ $elapsed_seconds -ge $timeout_seconds ]; then
echo "FAILURE: Haproxy pcs resource didn't get started after reboot"
exit 1
ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'haproxy-bundle'
exit 1
fi
done
fi

View File

@ -14,13 +14,14 @@ sudo docker exec $(sudo docker ps | grep -oP haproxy-bundle.*) bash -c 'echo "sh
SSH
grep DOWN ~/haproxy.stats > /dev/null
if [[ $? != 0 ]]; then
echo "HAproxy backends are ready"
break
fi
sleep 3
(( elapsed_seconds += 3 ))
if [ $elapsed_seconds -ge $timeout_seconds ]; then
echo "FAILURE: $(grep DOWN haproxy.stats | awk -F ',' {'print $1'}) is down on $(grep DOWN haproxy.stats | awk -F ',' {'print $2'})"
exit 1
exit 1
fi
done
fi

View File

@ -9,12 +9,14 @@ while true; do
echo "Waiting for rabbitmq pcs resource to start"
RABBIT_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:rabbitmq-cluster | grep -vi FAILED | grep -i started | wc -l)
if [[ $RABBIT_RES = 1 ]] || [[ $RABBIT_RES > 2 ]]; then
echo "${RABBIT_RES} instances of rabbitmq pcs resource are started"
break
fi
sleep 3
(( elapsed_seconds += 3 ))
if [ $elapsed_seconds -ge $timeout_seconds ]; then
echo "FAILURE: Rabbitmq pcs resource didn't get started after reboot"
exit 1
ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:rabbitmq-cluster'
exit 1
fi
done

View File

@ -9,12 +9,14 @@ while true; do
echo "Waiting for redis pcs resource to start"
REDIS_RES=$(ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep ocf::heartbeat:redis | grep -vi FAILED | grep -i master | wc -l)
if [[ $REDIS_RES = 1 ]]; then
echo "Redis master is ready"
break
fi
sleep 3
(( elapsed_seconds += 3 ))
if [ $elapsed_seconds -ge $timeout_seconds ]; then
echo "FAILURE: redis pcs resource didn't get started after reboot"
exit 1
ssh -q -o StrictHostKeyChecking=no $OC_USER@$NODE_IP 'sudo pcs status --full' | grep 'ocf::heartbeat:redis'
exit 1
fi
done