After go in standby mode give time to the resources to be migrated

Add a 3 retries chance with 5 seconds between each to let pacemaker
finish migrating resources out from the node before failing.

Change-Id: Ia25489e7e702fe26cb3ee7d96c4cf2e53ead8a96
Closes-Bug: 1794992
This commit is contained in:
Felipe Reyes 2018-09-28 11:15:07 -03:00
parent 598ecc81b4
commit 863bc4d05b
1 changed files with 19 additions and 1 deletions

View File

@ -24,6 +24,7 @@ import subprocess
import socket
import fcntl
import struct
import time
import xml.etree.ElementTree as ET
from base64 import b64decode
@ -107,6 +108,8 @@ COROSYNC_CONF_FILES = [
COROSYNC_HACLUSTER_ACL,
]
SUPPORTED_TRANSPORTS = ['udp', 'udpu', 'multicast', 'unicast']
PCMKR_MAX_RETRIES = 3
PCMKR_SLEEP_SECS = 5
SYSTEMD_OVERRIDES_DIR = '/etc/systemd/system/{}.service.d'
SYSTEMD_OVERRIDES_FILE = '{}/overrides.conf'
@ -845,7 +848,22 @@ def pause_unit():
enter_standby_mode(node_name)
if not is_in_standby_mode(node_name):
messages.append("Node not in standby mode")
if node_has_resources(node_name):
# some resources may take some time to be migrated out from the node. So 3
# retries are made with a 5 seconds wait between each one.
i = 0
ready = False
has_resources = False
while i < PCMKR_MAX_RETRIES and not ready:
if node_has_resources(node_name):
has_resources = True
i += 1
time.sleep(PCMKR_SLEEP_SECS)
else:
ready = True
has_resources = False
if has_resources:
messages.append("Resources still running on unit")
status, message = assess_status_helper()
if status != 'active':