From 77897d0ca5ea03168c882fb6aab29fad78d33764 Mon Sep 17 00:00:00 2001 From: Jianghua Wang Date: Thu, 13 Oct 2016 18:36:38 +0800 Subject: [PATCH] Fix race condition for pcs If run "pcs resource restart" while some resources are still in restarting or initiating stage, it may result into failures for both. So change code to wait until all ocf resources are started before invoking "pcs resource restart" to restart p_ceilometer-agent-central. Change-Id: Ief3557462ed8a017bd4cc69336181147ed61244d --- .../controller_post_deployment.sh | 24 +++++++++++++++++++ plugin_source/deployment_tasks.yaml | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/plugin_source/deployment_scripts/controller_post_deployment.sh b/plugin_source/deployment_scripts/controller_post_deployment.sh index 257ad34..43b2ab9 100755 --- a/plugin_source/deployment_scripts/controller_post_deployment.sh +++ b/plugin_source/deployment_scripts/controller_post_deployment.sh @@ -50,9 +50,33 @@ EOF service nova-consoleauth restart } +function wait_ocf_resource_started { + # wait upto $TIMEOUT seconds until all ocf resources are started + TIMEOUT=300 + INTERVAL=10 + remain_time=$TIMEOUT + while [ ${remain_time} -gt 0 ]; do + if pcs resource show | grep ocf::fuel | grep -v Started >> $LOG_FILE; then + echo "$(date): wait for resources to start." >> $LOG_FILE + sleep $INTERVAL + remain_time=$((remain_time - $INTERVAL)) + else + return 0 + fi + done + echo "Error: $(date): timeout for waiting resources to start." >> $LOG_FILE + echo "Error: $(date): timeout for waiting resources to start." >&2 + exit 1 +} + function mod_ceilometer { # modify ceilometer configuration per need. if pcs resource show p_ceilometer-agent-central >/dev/null 2>&1; then + # wait until all ocf resources are started, otherwise there is risk for race + # condition: If run "pcs resource restart" while some resources are still in + # restarting or initiating stage, it may result into failures for both. + wait_ocf_resource_started + # exclude network.services.* to avoid NotFound: 404 service not found error. sed -i '/- "!storage.api.request"/a\ - "!network.services.*"' \ /etc/ceilometer/pipeline.yaml>>$LOG_FILE 2>&1 diff --git a/plugin_source/deployment_tasks.yaml b/plugin_source/deployment_tasks.yaml index c259120..41f29ae 100644 --- a/plugin_source/deployment_tasks.yaml +++ b/plugin_source/deployment_tasks.yaml @@ -28,4 +28,4 @@ type: shell parameters: cmd: ./controller_post_deployment.sh - timeout: 300 \ No newline at end of file + timeout: 600