Make the workgroup status more robust
The current charm does not indicated to the end user when a specific resource is not running. Neither does it indicate when a node is offline or stopped. Validate that configured resources are actually running and let the end user know if they are not. Closes-Bug: #1834263 Change-Id: I1171e71ae3b015b4b838b7ecf0de18eb10d7c8f2
This commit is contained in:
parent
4d391e8107
commit
9364440075
|
@ -113,6 +113,37 @@ def crm_res_running(opt_name):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def crm_res_running_on_node(resource, node):
|
||||||
|
"""Determine if the resource is running on the given node.
|
||||||
|
|
||||||
|
If the resource is active/passive check if it is running on any node.
|
||||||
|
If the resources is active/active check it is running on the given node.
|
||||||
|
|
||||||
|
:param resource: str name of resource
|
||||||
|
:param node: str name of node
|
||||||
|
:returns: boolean
|
||||||
|
"""
|
||||||
|
|
||||||
|
(_, output) = subprocess.getstatusoutput(
|
||||||
|
"crm resource status {}".format(resource))
|
||||||
|
lines = output.split("\n")
|
||||||
|
|
||||||
|
if len(lines) > 1:
|
||||||
|
# Multi line is a clone list like haproxy and should run on all nodes
|
||||||
|
# check if it is running on this node
|
||||||
|
for line in lines:
|
||||||
|
if node in line:
|
||||||
|
if line.startswith("resource {} is running".format(resource)):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# Single line is for active/passive like a VIP, may not be on this node
|
||||||
|
# but check it is running somewhere
|
||||||
|
if output.startswith("resource {} is running".format(resource)):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def list_nodes():
|
def list_nodes():
|
||||||
"""List member nodes."""
|
"""List member nodes."""
|
||||||
cmd = ['crm', 'node', 'status']
|
cmd = ['crm', 'node', 'status']
|
||||||
|
|
|
@ -1157,7 +1157,8 @@ def pause_unit():
|
||||||
if has_resources:
|
if has_resources:
|
||||||
messages.append("Resources still running on unit")
|
messages.append("Resources still running on unit")
|
||||||
status, message = assess_status_helper()
|
status, message = assess_status_helper()
|
||||||
if status != 'active':
|
# New status message will indicate the resource is not running
|
||||||
|
if status != 'active' and 'not running' not in message:
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
if messages and not is_unit_upgrading_set():
|
if messages and not is_unit_upgrading_set():
|
||||||
raise Exception("Couldn't pause: {}".format("; ".join(messages)))
|
raise Exception("Couldn't pause: {}".format("; ".join(messages)))
|
||||||
|
@ -1210,6 +1211,14 @@ def assess_status_helper():
|
||||||
status = 'maintenance'
|
status = 'maintenance'
|
||||||
message = 'Pacemaker in maintenance mode'
|
message = 'Pacemaker in maintenance mode'
|
||||||
|
|
||||||
|
for resource in get_resources().keys():
|
||||||
|
if not pcmk.is_resource_present(resource):
|
||||||
|
return ("waiting",
|
||||||
|
"Resource: {} not yet configured".format(resource))
|
||||||
|
if not pcmk.crm_res_running_on_node(resource, get_hostname()):
|
||||||
|
return ("blocked",
|
||||||
|
"Resource: {} not running".format(resource))
|
||||||
|
|
||||||
return status, message
|
return status, message
|
||||||
|
|
||||||
|
|
||||||
|
@ -1266,3 +1275,15 @@ def maintenance_mode(enable):
|
||||||
pcmk.set_property('maintenance-mode', str(enable).lower())
|
pcmk.set_property('maintenance-mode', str(enable).lower())
|
||||||
else:
|
else:
|
||||||
log('Desired value for maintenance-mode is already set', level=DEBUG)
|
log('Desired value for maintenance-mode is already set', level=DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
def get_resources():
|
||||||
|
"""Get resources from the HA relation
|
||||||
|
|
||||||
|
:returns: dict of resources
|
||||||
|
"""
|
||||||
|
resources = {}
|
||||||
|
for rid in relation_ids("ha"):
|
||||||
|
for unit in related_units(rid):
|
||||||
|
resources = parse_data(rid, unit, 'resources')
|
||||||
|
return resources
|
||||||
|
|
|
@ -107,6 +107,50 @@ class TestPcmk(unittest.TestCase):
|
||||||
getstatusoutput.return_value = (1, "foobar")
|
getstatusoutput.return_value = (1, "foobar")
|
||||||
self.assertFalse(pcmk.crm_res_running('res_nova_consoleauth'))
|
self.assertFalse(pcmk.crm_res_running('res_nova_consoleauth'))
|
||||||
|
|
||||||
|
@mock.patch('subprocess.getstatusoutput')
|
||||||
|
def test_crm_res_running_on_node(self, getstatusoutput):
|
||||||
|
_resource = "res_nova_consoleauth"
|
||||||
|
_this_node = "node1"
|
||||||
|
_another_node = "node5"
|
||||||
|
|
||||||
|
# Not running
|
||||||
|
getstatusoutput.return_value = (1, "foobar")
|
||||||
|
self.assertFalse(
|
||||||
|
pcmk.crm_res_running_on_node(_resource, _this_node))
|
||||||
|
|
||||||
|
# Running active/passive on some other node
|
||||||
|
getstatusoutput.return_value = (
|
||||||
|
0, "resource {} is running: {}".format(_resource, _another_node))
|
||||||
|
self.assertTrue(
|
||||||
|
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||||
|
|
||||||
|
# Running active/passive on this node
|
||||||
|
getstatusoutput.return_value = (
|
||||||
|
0, "resource {} is running: {}".format(_resource, _this_node))
|
||||||
|
self.assertTrue(
|
||||||
|
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||||
|
|
||||||
|
# Running on some but not this node
|
||||||
|
getstatusoutput.return_value = (
|
||||||
|
0, ("resource {} is running: {}\nresource {} is NOT running"
|
||||||
|
.format(_resource, _another_node, _resource)))
|
||||||
|
self.assertFalse(
|
||||||
|
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||||
|
|
||||||
|
# Running on this node and not others
|
||||||
|
getstatusoutput.return_value = (
|
||||||
|
0, ("resource {} is running: {}\nresource {} is NOT running"
|
||||||
|
.format(_resource, _this_node, _resource)))
|
||||||
|
self.assertTrue(
|
||||||
|
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||||
|
|
||||||
|
# Running on more than one and this node
|
||||||
|
getstatusoutput.return_value = (
|
||||||
|
0, ("resource {} is running: {}\nresource {} is running: {}"
|
||||||
|
.format(_resource, _another_node, _resource, _this_node)))
|
||||||
|
self.assertTrue(
|
||||||
|
pcmk.crm_res_running_on_node('res_nova_consoleauth', _this_node))
|
||||||
|
|
||||||
@mock.patch('socket.gethostname')
|
@mock.patch('socket.gethostname')
|
||||||
@mock.patch('subprocess.getstatusoutput')
|
@mock.patch('subprocess.getstatusoutput')
|
||||||
def test_wait_for_pcmk(self, getstatusoutput, gethostname):
|
def test_wait_for_pcmk(self, getstatusoutput, gethostname):
|
||||||
|
|
Loading…
Reference in New Issue