From 7ddf9806c54e227921fd66643eaa570d45cecd81 Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Tue, 9 Apr 2019 17:26:12 -0400 Subject: [PATCH] Add dbmon timeouts to handle swact scenario It turns out that when swacting we can end up with kubernetes going down for a while, causing kubectl commands to hang. Accordingly, let's add some timeouts to critical commands to limit how long they can hang for. Change-Id: I777895497300cc605762db002958a778cd204e49 Story: 2004712 Task: 30410 Signed-off-by: Chris Friesen --- openstack/stx-ocf-scripts/src/ocf/dbmon | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/openstack/stx-ocf-scripts/src/ocf/dbmon b/openstack/stx-ocf-scripts/src/ocf/dbmon index 0a95a07d..b59c9296 100644 --- a/openstack/stx-ocf-scripts/src/ocf/dbmon +++ b/openstack/stx-ocf-scripts/src/ocf/dbmon @@ -105,11 +105,13 @@ check_has_garbd_chart() { debuginfo() { # Log some information on what's preventing us from getting the DB status + # The "timeout" call is in case we're in the middle of swacting and kubectl + # isn't responding, in which case the audit should catch any issues. APP_STATUS='uninstalled' - # Check whether kubectl is working - kubectl get node ${HOSTNAME} &> /dev/null + # Check whether kubectl is working. + timeout -k 5 5 kubectl get node ${HOSTNAME} &> /dev/null if [ $? -ne 0 ]; then ocf_log info "kubectl isn't working." STATUS="Primary" @@ -168,8 +170,11 @@ get_status() { get_pod_and_status() { - # Get name of local mariadb pod - PODNAME=`kubectl -n openstack get pod --field-selector spec.nodeName=${HOSTNAME} \ + # Get name of local mariadb pod. + # The "timeout" call is in case we're in the middle of swacting and kubectl + # isn't responding, in which case the audit should catch any issues. + + PODNAME=`timeout -k 5 5 kubectl -n openstack get pod --field-selector spec.nodeName=${HOSTNAME} \ -l application=mariadb,component=server -o=jsonpath='{.items[0].metadata.name'}` if [ $? -ne 0 ]; then ocf_log info "Error getting mariadb server pod name on this node."