Fix race condition for pcs

If run "pcs resource restart" while some resources are still in
restarting or initiating stage, it may result into failures for both.
So change code to wait until all ocf resources are started before
invoking "pcs resource restart" to restart p_ceilometer-agent-central.

Change-Id: Ief3557462ed8a017bd4cc69336181147ed61244d
This commit is contained in:
Jianghua Wang 2016-10-13 18:36:38 +08:00
parent f2b2df6676
commit 77897d0ca5
2 changed files with 25 additions and 1 deletions

View File

@ -50,9 +50,33 @@ EOF
service nova-consoleauth restart
}
function wait_ocf_resource_started {
# wait upto $TIMEOUT seconds until all ocf resources are started
TIMEOUT=300
INTERVAL=10
remain_time=$TIMEOUT
while [ ${remain_time} -gt 0 ]; do
if pcs resource show | grep ocf::fuel | grep -v Started >> $LOG_FILE; then
echo "$(date): wait for resources to start." >> $LOG_FILE
sleep $INTERVAL
remain_time=$((remain_time - $INTERVAL))
else
return 0
fi
done
echo "Error: $(date): timeout for waiting resources to start." >> $LOG_FILE
echo "Error: $(date): timeout for waiting resources to start." >&2
exit 1
}
function mod_ceilometer {
# modify ceilometer configuration per need.
if pcs resource show p_ceilometer-agent-central >/dev/null 2>&1; then
# wait until all ocf resources are started, otherwise there is risk for race
# condition: If run "pcs resource restart" while some resources are still in
# restarting or initiating stage, it may result into failures for both.
wait_ocf_resource_started
# exclude network.services.* to avoid NotFound: 404 service not found error.
sed -i '/- "!storage.api.request"/a\ - "!network.services.*"' \
/etc/ceilometer/pipeline.yaml>>$LOG_FILE 2>&1

View File

@ -28,4 +28,4 @@
type: shell
parameters:
cmd: ./controller_post_deployment.sh
timeout: 300
timeout: 600