Adjust quorum after node removal
Add an `update-ring` action for that purpose. Also print more on various pacemaker failures. Also removed some dead code. Func-Test-PR: https://github.com/openstack-charmers/zaza-openstack-tests/pull/369 Change-Id: I35c0c9ce67fd459b9c3099346705d43d76bbdfe4 Closes-Bug: #1400481 Related-Bug: #1874719 Co-Authored-By: Aurelien Lourot <aurelien.lourot@canonical.com> Co-Authored-By: Felipe Reyes <felipe.reyes@canonical.com>
This commit is contained in:
parent
6e1f20040c
commit
457f88eda6
31
README.md
31
README.md
|
@ -67,14 +67,37 @@ in a container on existing machines 0, 1, and 2:
|
||||||
This section lists Juju [actions][juju-docs-actions] supported by the charm.
|
This section lists Juju [actions][juju-docs-actions] supported by the charm.
|
||||||
Actions allow specific operations to be performed on a per-unit basis.
|
Actions allow specific operations to be performed on a per-unit basis.
|
||||||
|
|
||||||
* `pause`
|
* `pause`
|
||||||
* `resume`
|
* `resume`
|
||||||
* `status`
|
* `status`
|
||||||
* `cleanup`
|
* `cleanup`
|
||||||
|
* `update-ring`
|
||||||
|
|
||||||
To display action descriptions run `juju actions hacluster`. If the charm is
|
To display action descriptions run `juju actions hacluster`. If the charm is
|
||||||
not deployed then see file ``actions.yaml``.
|
not deployed then see file ``actions.yaml``.
|
||||||
|
|
||||||
|
### update-ring action
|
||||||
|
|
||||||
|
The `update-ring` action requires a parameter (`i-really-mean-it=true`) to make
|
||||||
|
sure tidying up the list of available corosync nodes in the ring is intended.
|
||||||
|
|
||||||
|
The operation expects:
|
||||||
|
|
||||||
|
1. `juju run-action hacluster/N pause --wait`. This will make sure no Pacemaker
|
||||||
|
resources run on the unit.
|
||||||
|
|
||||||
|
2. `juju remove-unit principal-unit/N`. Iterate through this step as many times
|
||||||
|
as units want to be removed (ie. to scale back from 6 to 3 units).
|
||||||
|
|
||||||
|
3. `juju run-action hacluster/leader update-ring i-really-mean-it=true --wait`.
|
||||||
|
This step will remove corosync nodes from the ring and update corosync.conf
|
||||||
|
to list an updated number of nodes (min_quorum is recalculated).
|
||||||
|
|
||||||
|
In case a unit goes into lost state (ie. caused by hardware failure), the
|
||||||
|
initial step (pause a unit) can be skipped. Unit removal may also be replaced
|
||||||
|
by `juju remove-machine N --force`, where N is the Juju machine ID where the
|
||||||
|
unit to be removed runs.
|
||||||
|
|
||||||
# Bugs
|
# Bugs
|
||||||
|
|
||||||
Please report bugs on [Launchpad][lp-bugs-charm-hacluster].
|
Please report bugs on [Launchpad][lp-bugs-charm-hacluster].
|
||||||
|
|
|
@ -12,3 +12,12 @@ cleanup:
|
||||||
default: "all"
|
default: "all"
|
||||||
type: string
|
type: string
|
||||||
description: Resource name to cleanup
|
description: Resource name to cleanup
|
||||||
|
update-ring:
|
||||||
|
description: Trigger corosync node members cleanup
|
||||||
|
params:
|
||||||
|
i-really-mean-it:
|
||||||
|
type: boolean
|
||||||
|
description: |
|
||||||
|
This must be toggled to enable actually performing this action
|
||||||
|
required:
|
||||||
|
- i-really-mean-it
|
||||||
|
|
|
@ -18,6 +18,7 @@ import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import uuid
|
||||||
|
|
||||||
sys.path.append('hooks/')
|
sys.path.append('hooks/')
|
||||||
|
|
||||||
|
@ -39,13 +40,21 @@ from charmhelpers.core.hookenv import (
|
||||||
action_fail,
|
action_fail,
|
||||||
action_get,
|
action_get,
|
||||||
action_set,
|
action_set,
|
||||||
|
is_leader,
|
||||||
log,
|
log,
|
||||||
|
relation_ids,
|
||||||
|
relation_set,
|
||||||
)
|
)
|
||||||
from utils import (
|
from utils import (
|
||||||
|
emit_corosync_conf,
|
||||||
|
is_update_ring_requested,
|
||||||
pause_unit,
|
pause_unit,
|
||||||
resume_unit,
|
resume_unit,
|
||||||
|
update_node_list,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import pcmk
|
||||||
|
|
||||||
|
|
||||||
def pause(args):
|
def pause(args):
|
||||||
"""Pause the hacluster services.
|
"""Pause the hacluster services.
|
||||||
|
@ -98,8 +107,46 @@ def cleanup(args):
|
||||||
"'{}'".format(resource_name))
|
"'{}'".format(resource_name))
|
||||||
|
|
||||||
|
|
||||||
|
def update_ring(args):
|
||||||
|
"""Update corosync.conf list of nodes (generally after unit removal)."""
|
||||||
|
if not action_get('i-really-mean-it'):
|
||||||
|
action_fail('i-really-mean-it is a required parameter')
|
||||||
|
return
|
||||||
|
|
||||||
|
if not is_leader():
|
||||||
|
action_fail('only the Juju leader can run this action')
|
||||||
|
return
|
||||||
|
|
||||||
|
diff_nodes = update_node_list()
|
||||||
|
if not diff_nodes:
|
||||||
|
# No differences between discovered Pacemaker nodes and
|
||||||
|
# Juju nodes (ie. no node removal)
|
||||||
|
action_set({'result': 'noop'})
|
||||||
|
return
|
||||||
|
|
||||||
|
# Trigger emit_corosync_conf() and corosync-cfgtool -R
|
||||||
|
# for all the hanode peer units to run
|
||||||
|
relid = relation_ids('hanode')
|
||||||
|
if len(relid) < 1:
|
||||||
|
action_fail('no peer ha nodes')
|
||||||
|
return
|
||||||
|
corosync_update_uuid = uuid.uuid1().hex
|
||||||
|
reldata = {'trigger-corosync-update': corosync_update_uuid}
|
||||||
|
relation_set(relation_id=relid[0],
|
||||||
|
relation_settings=reldata)
|
||||||
|
|
||||||
|
# Trigger the same logic in the leader (no hanode-relation-changed
|
||||||
|
# hook will be received by self)
|
||||||
|
if (is_update_ring_requested(corosync_update_uuid) and
|
||||||
|
emit_corosync_conf()):
|
||||||
|
cmd = 'corosync-cfgtool -R'
|
||||||
|
pcmk.commit(cmd)
|
||||||
|
action_set({'result': 'success'})
|
||||||
|
|
||||||
|
|
||||||
ACTIONS = {"pause": pause, "resume": resume,
|
ACTIONS = {"pause": pause, "resume": resume,
|
||||||
"status": status, "cleanup": cleanup}
|
"status": status, "cleanup": cleanup,
|
||||||
|
"update-ring": update_ring}
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
actions.py
|
|
@ -71,8 +71,8 @@ options:
|
||||||
description: |
|
description: |
|
||||||
When enabled pacemaker will be put in maintenance mode, this will allow
|
When enabled pacemaker will be put in maintenance mode, this will allow
|
||||||
administrators to manipulate cluster resources (e.g. stop daemons, reboot
|
administrators to manipulate cluster resources (e.g. stop daemons, reboot
|
||||||
machines, etc). Pacemaker will not monitor the resources while maintence
|
machines, etc). Pacemaker will not monitor the resources while maintenance
|
||||||
mode is enabled.
|
mode is enabled and node removals won't be processed.
|
||||||
service_start_timeout:
|
service_start_timeout:
|
||||||
type: int
|
type: int
|
||||||
default: 180
|
default: 180
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
hooks.py
|
|
@ -45,6 +45,8 @@ from charmhelpers.core.hookenv import (
|
||||||
related_units,
|
related_units,
|
||||||
relation_ids,
|
relation_ids,
|
||||||
relation_set,
|
relation_set,
|
||||||
|
remote_unit,
|
||||||
|
principal_unit,
|
||||||
config,
|
config,
|
||||||
Hooks,
|
Hooks,
|
||||||
UnregisteredHookError,
|
UnregisteredHookError,
|
||||||
|
@ -122,6 +124,8 @@ from utils import (
|
||||||
disable_stonith,
|
disable_stonith,
|
||||||
is_stonith_configured,
|
is_stonith_configured,
|
||||||
emit_systemd_overrides_file,
|
emit_systemd_overrides_file,
|
||||||
|
trigger_corosync_update_from_leader,
|
||||||
|
emit_corosync_conf,
|
||||||
)
|
)
|
||||||
|
|
||||||
from charmhelpers.contrib.charmsupport import nrpe
|
from charmhelpers.contrib.charmsupport import nrpe
|
||||||
|
@ -293,6 +297,22 @@ def hanode_relation_changed(relid=None):
|
||||||
ha_relation_changed()
|
ha_relation_changed()
|
||||||
|
|
||||||
|
|
||||||
|
@hooks.hook('hanode-relation-departed')
|
||||||
|
def hanode_relation_departed(relid=None):
|
||||||
|
if config('maintenance-mode'):
|
||||||
|
log('pcmk is in maintenance mode - skip any action', DEBUG)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Note(aluria): all units will update corosync.conf list of nodes
|
||||||
|
# in the aim of having up to date stored configurations. However,
|
||||||
|
# corosync reloads (or restarts) won't be triggered at this point
|
||||||
|
# (update-ring action will do)
|
||||||
|
if emit_corosync_conf():
|
||||||
|
log('corosync.conf updated')
|
||||||
|
else:
|
||||||
|
log('corosync.conf not updated')
|
||||||
|
|
||||||
|
|
||||||
@hooks.hook('ha-relation-joined',
|
@hooks.hook('ha-relation-joined',
|
||||||
'ha-relation-changed',
|
'ha-relation-changed',
|
||||||
'peer-availability-relation-joined',
|
'peer-availability-relation-joined',
|
||||||
|
@ -306,9 +326,22 @@ def ha_relation_changed():
|
||||||
level=INFO)
|
level=INFO)
|
||||||
return
|
return
|
||||||
|
|
||||||
if relation_ids('hanode'):
|
relid_hanode = relation_ids('hanode')
|
||||||
|
if relid_hanode:
|
||||||
log('Ready to form cluster - informing peers', level=DEBUG)
|
log('Ready to form cluster - informing peers', level=DEBUG)
|
||||||
relation_set(relation_id=relation_ids('hanode')[0], ready=True)
|
relation_set(relation_id=relid_hanode[0], ready=True)
|
||||||
|
|
||||||
|
# If a trigger-corosync-update attribute exists in the relation,
|
||||||
|
# the Juju leader may have requested all its peers to update
|
||||||
|
# the corosync.conf list of nodes. If it's the case, no other
|
||||||
|
# action will be run (a future hook re: ready=True may trigger
|
||||||
|
# other logic)
|
||||||
|
if (remote_unit() != principal_unit() and
|
||||||
|
trigger_corosync_update_from_leader(
|
||||||
|
remote_unit(), relid_hanode[0]
|
||||||
|
)):
|
||||||
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log('Ready to form cluster, but not related to peers just yet',
|
log('Ready to form cluster, but not related to peers just yet',
|
||||||
level=INFO)
|
level=INFO)
|
||||||
|
@ -563,8 +596,12 @@ def ha_relation_changed():
|
||||||
|
|
||||||
@hooks.hook()
|
@hooks.hook()
|
||||||
def stop():
|
def stop():
|
||||||
cmd = 'crm -w -F node delete %s' % socket.gethostname()
|
# NOTE(lourot): This seems to always fail with
|
||||||
pcmk.commit(cmd)
|
# 'ERROR: node <node_name> not found in the CIB', which means that the node
|
||||||
|
# has already been removed from the cluster. Thus failure_is_fatal=False.
|
||||||
|
# We might consider getting rid of this call.
|
||||||
|
pcmk.delete_node(socket.gethostname(), failure_is_fatal=False)
|
||||||
|
|
||||||
apt_purge(['corosync', 'pacemaker'], fatal=True)
|
apt_purge(['corosync', 'pacemaker'], fatal=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
110
hooks/pcmk.py
110
hooks/pcmk.py
|
@ -40,18 +40,41 @@ class PropertyNotFound(Exception):
|
||||||
|
|
||||||
|
|
||||||
def wait_for_pcmk(retries=12, sleep=10):
|
def wait_for_pcmk(retries=12, sleep=10):
|
||||||
crm_up = None
|
"""Wait for pacemaker/corosync to fully come up.
|
||||||
hostname = socket.gethostname()
|
|
||||||
|
:param retries: Number of times to check for crm's output before raising.
|
||||||
|
:type retries: int
|
||||||
|
:param sleep: Number of seconds to sleep between retries.
|
||||||
|
:type sleep: int
|
||||||
|
:raises: ServicesNotUp
|
||||||
|
"""
|
||||||
|
expected_hostname = socket.gethostname()
|
||||||
|
last_exit_code = None
|
||||||
|
last_output = None
|
||||||
for i in range(retries):
|
for i in range(retries):
|
||||||
if crm_up:
|
if i > 0:
|
||||||
return True
|
|
||||||
output = subprocess.getstatusoutput("crm node list")[1]
|
|
||||||
crm_up = hostname in output
|
|
||||||
time.sleep(sleep)
|
time.sleep(sleep)
|
||||||
if not crm_up:
|
last_exit_code, last_output = subprocess.getstatusoutput(
|
||||||
raise ServicesNotUp("Pacemaker or Corosync are still down after "
|
'crm node list')
|
||||||
"waiting for {} retries. Last output: {}"
|
if expected_hostname in last_output:
|
||||||
"".format(retries, output))
|
return
|
||||||
|
|
||||||
|
msg = ('Pacemaker or Corosync are still not fully up after waiting for '
|
||||||
|
'{} retries. '.format(retries))
|
||||||
|
if last_exit_code != 0:
|
||||||
|
msg += 'Last exit code: {}. '.format(last_exit_code)
|
||||||
|
if 'not supported between' in last_output:
|
||||||
|
# NOTE(lourot): transient crmsh bug
|
||||||
|
# https://github.com/ClusterLabs/crmsh/issues/764
|
||||||
|
msg += 'This looks like ClusterLabs/crmsh#764. '
|
||||||
|
elif 'node1' in last_output:
|
||||||
|
# NOTE(lourot): transient bug on deployment. The charm will recover
|
||||||
|
# later but the corosync ring will still show an offline 'node1' node.
|
||||||
|
# The corosync ring can then be cleaned up by running the 'update-ring'
|
||||||
|
# action.
|
||||||
|
msg += 'This looks like lp:1874719. '
|
||||||
|
msg += 'Last output: {}'.format(last_output)
|
||||||
|
raise ServicesNotUp(msg)
|
||||||
|
|
||||||
|
|
||||||
def commit(cmd, failure_is_fatal=False):
|
def commit(cmd, failure_is_fatal=False):
|
||||||
|
@ -64,7 +87,7 @@ def commit(cmd, failure_is_fatal=False):
|
||||||
:raises: subprocess.CalledProcessError
|
:raises: subprocess.CalledProcessError
|
||||||
"""
|
"""
|
||||||
if failure_is_fatal:
|
if failure_is_fatal:
|
||||||
return subprocess.check_call(cmd.split())
|
return subprocess.check_output(cmd.split(), stderr=subprocess.STDOUT)
|
||||||
else:
|
else:
|
||||||
return subprocess.call(cmd.split())
|
return subprocess.call(cmd.split())
|
||||||
|
|
||||||
|
@ -77,24 +100,6 @@ def is_resource_present(resource):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def standby(node=None):
|
|
||||||
if node is None:
|
|
||||||
cmd = "crm -F node standby"
|
|
||||||
else:
|
|
||||||
cmd = "crm -F node standby %s" % node
|
|
||||||
|
|
||||||
commit(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def online(node=None):
|
|
||||||
if node is None:
|
|
||||||
cmd = "crm -F node online"
|
|
||||||
else:
|
|
||||||
cmd = "crm -F node online %s" % node
|
|
||||||
|
|
||||||
commit(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def crm_opt_exists(opt_name):
|
def crm_opt_exists(opt_name):
|
||||||
output = subprocess.getstatusoutput("crm configure show")[1]
|
output = subprocess.getstatusoutput("crm configure show")[1]
|
||||||
if opt_name in output:
|
if opt_name in output:
|
||||||
|
@ -168,6 +173,53 @@ def list_nodes():
|
||||||
return sorted(nodes)
|
return sorted(nodes)
|
||||||
|
|
||||||
|
|
||||||
|
def set_node_status_to_maintenance(node_name):
|
||||||
|
"""See https://crmsh.github.io/man-2.0/#cmdhelp_node_maintenance
|
||||||
|
|
||||||
|
:param node_name: Name of the node to set to maintenance.
|
||||||
|
:type node_name: str
|
||||||
|
:raises: subprocess.CalledProcessError
|
||||||
|
"""
|
||||||
|
log('Setting node {} to maintenance'.format(node_name))
|
||||||
|
commit('crm -w -F node maintenance {}'.format(node_name),
|
||||||
|
failure_is_fatal=True)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_node(node_name, failure_is_fatal=True):
|
||||||
|
"""See https://crmsh.github.io/man-2.0/#cmdhelp_node_delete
|
||||||
|
|
||||||
|
:param node_name: Name of the node to be removed from the cluster.
|
||||||
|
:type node_name: str
|
||||||
|
:param failure_is_fatal: Whether to raise exception if command fails.
|
||||||
|
:type failure_is_fatal: bool
|
||||||
|
:raises: subprocess.CalledProcessError
|
||||||
|
"""
|
||||||
|
log('Deleting node {} from the cluster'.format(node_name))
|
||||||
|
cmd = 'crm -w -F node delete {}'.format(node_name)
|
||||||
|
for attempt in [2, 1, 0]:
|
||||||
|
try:
|
||||||
|
commit(cmd, failure_is_fatal=failure_is_fatal)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
output = e.output.decode('utf-8').strip()
|
||||||
|
log('"{}" failed with "{}"'.format(cmd, output), WARNING)
|
||||||
|
if output == 'ERROR: node {} not found in the CIB'.format(
|
||||||
|
node_name):
|
||||||
|
# NOTE(lourot): Sometimes seen when called from the
|
||||||
|
# `update-ring` action.
|
||||||
|
log('{} was already removed from the cluster, moving on',
|
||||||
|
WARNING)
|
||||||
|
return
|
||||||
|
if '/cmdline' in output:
|
||||||
|
# NOTE(lourot): older versions of crmsh may fail with
|
||||||
|
# https://github.com/ClusterLabs/crmsh/issues/283 . If that's
|
||||||
|
# the case let's retry.
|
||||||
|
log('This looks like ClusterLabs/crmsh#283.', WARNING)
|
||||||
|
if attempt > 0:
|
||||||
|
log('Retrying...', WARNING)
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
def get_property_from_xml(name, output):
|
def get_property_from_xml(name, output):
|
||||||
"""Read a configuration property from the XML generated by 'crm configure show
|
"""Read a configuration property from the XML generated by 'crm configure show
|
||||||
xml'
|
xml'
|
||||||
|
|
145
hooks/utils.py
145
hooks/utils.py
|
@ -118,6 +118,27 @@ class MAASConfigIncomplete(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveCorosyncNodeFailed(Exception):
|
||||||
|
def __init__(self, node_name, called_process_error):
|
||||||
|
msg = 'Removing {} from the cluster failed. {} output={}'.format(
|
||||||
|
node_name, called_process_error, called_process_error.output)
|
||||||
|
super(RemoveCorosyncNodeFailed, self).__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class EnableStonithFailed(Exception):
|
||||||
|
def __init__(self, called_process_error):
|
||||||
|
msg = 'Enabling STONITH failed. {} output={}'.format(
|
||||||
|
called_process_error, called_process_error.output)
|
||||||
|
super(EnableStonithFailed, self).__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class DisableStonithFailed(Exception):
|
||||||
|
def __init__(self, called_process_error):
|
||||||
|
msg = 'Disabling STONITH failed. {} output={}'.format(
|
||||||
|
called_process_error, called_process_error.output)
|
||||||
|
super(DisableStonithFailed, self).__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
def disable_upstart_services(*services):
|
def disable_upstart_services(*services):
|
||||||
for service in services:
|
for service in services:
|
||||||
with open("/etc/init/{}.override".format(service), "wt") as override:
|
with open("/etc/init/{}.override".format(service), "wt") as override:
|
||||||
|
@ -516,9 +537,13 @@ def configure_stonith():
|
||||||
enable_stonith()
|
enable_stonith()
|
||||||
set_stonith_configured(True)
|
set_stonith_configured(True)
|
||||||
else:
|
else:
|
||||||
log('Disabling STONITH', level=INFO)
|
# NOTE(lourot): We enter here when no MAAS STONITH resource could be
|
||||||
cmd = "crm configure property stonith-enabled=false"
|
# created. Disabling STONITH for now. We're not calling
|
||||||
pcmk.commit(cmd)
|
# set_stonith_configured(), so that enabling STONITH will be retried
|
||||||
|
# later. (STONITH is now always enabled in this charm.)
|
||||||
|
# Without MAAS, we keep entering here, which isn't really an issue,
|
||||||
|
# except that this fails in rare cases, thus failure_is_fatal=False.
|
||||||
|
disable_stonith(failure_is_fatal=False)
|
||||||
|
|
||||||
|
|
||||||
def configure_monitor_host():
|
def configure_monitor_host():
|
||||||
|
@ -661,17 +686,33 @@ def configure_maas_stonith_resource(stonith_hostnames):
|
||||||
|
|
||||||
|
|
||||||
def enable_stonith():
|
def enable_stonith():
|
||||||
"""Enable stonith via the global property stonith-enabled."""
|
"""Enable stonith via the global property stonith-enabled.
|
||||||
|
|
||||||
|
:raises: EnableStonithFailed
|
||||||
|
"""
|
||||||
|
log('Enabling STONITH', level=INFO)
|
||||||
|
try:
|
||||||
pcmk.commit(
|
pcmk.commit(
|
||||||
"crm configure property stonith-enabled=true",
|
"crm configure property stonith-enabled=true",
|
||||||
failure_is_fatal=True)
|
failure_is_fatal=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise EnableStonithFailed(e)
|
||||||
|
|
||||||
|
|
||||||
def disable_stonith():
|
def disable_stonith(failure_is_fatal=True):
|
||||||
"""Disable stonith via the global property stonith-enabled."""
|
"""Disable stonith via the global property stonith-enabled.
|
||||||
|
|
||||||
|
:param failure_is_fatal: Whether to raise exception if command fails.
|
||||||
|
:type failure_is_fatal: bool
|
||||||
|
:raises: DisableStonithFailed
|
||||||
|
"""
|
||||||
|
log('Disabling STONITH', level=INFO)
|
||||||
|
try:
|
||||||
pcmk.commit(
|
pcmk.commit(
|
||||||
"crm configure property stonith-enabled=false",
|
"crm configure property stonith-enabled=false",
|
||||||
failure_is_fatal=True)
|
failure_is_fatal=failure_is_fatal)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise DisableStonithFailed(e)
|
||||||
|
|
||||||
|
|
||||||
def get_ip_addr_from_resource_params(params):
|
def get_ip_addr_from_resource_params(params):
|
||||||
|
@ -950,13 +991,14 @@ def restart_corosync_on_change():
|
||||||
def wrap(f):
|
def wrap(f):
|
||||||
def wrapped_f(*args, **kwargs):
|
def wrapped_f(*args, **kwargs):
|
||||||
checksums = {}
|
checksums = {}
|
||||||
|
if not is_unit_paused_set():
|
||||||
for path in COROSYNC_CONF_FILES:
|
for path in COROSYNC_CONF_FILES:
|
||||||
checksums[path] = file_hash(path)
|
checksums[path] = file_hash(path)
|
||||||
return_data = f(*args, **kwargs)
|
return_data = f(*args, **kwargs)
|
||||||
# NOTE: this assumes that this call is always done around
|
# NOTE: this assumes that this call is always done around
|
||||||
# configure_corosync, which returns true if configuration
|
# configure_corosync, which returns true if configuration
|
||||||
# files where actually generated
|
# files where actually generated
|
||||||
if return_data:
|
if return_data and not is_unit_paused_set():
|
||||||
for path in COROSYNC_CONF_FILES:
|
for path in COROSYNC_CONF_FILES:
|
||||||
if checksums[path] != file_hash(path):
|
if checksums[path] != file_hash(path):
|
||||||
validated_restart_corosync()
|
validated_restart_corosync()
|
||||||
|
@ -974,11 +1016,12 @@ def try_pcmk_wait():
|
||||||
try:
|
try:
|
||||||
pcmk.wait_for_pcmk()
|
pcmk.wait_for_pcmk()
|
||||||
log("Pacemaker is ready", DEBUG)
|
log("Pacemaker is ready", DEBUG)
|
||||||
except pcmk.ServicesNotUp:
|
except pcmk.ServicesNotUp as e:
|
||||||
msg = ("Pacemaker is down. Please manually start it.")
|
status_msg = "Pacemaker is down. Please manually start it."
|
||||||
log(msg, ERROR)
|
status_set('blocked', status_msg)
|
||||||
status_set('blocked', msg)
|
full_msg = "{} {}".format(status_msg, e)
|
||||||
raise pcmk.ServicesNotUp(msg)
|
log(full_msg, ERROR)
|
||||||
|
raise pcmk.ServicesNotUp(full_msg)
|
||||||
|
|
||||||
|
|
||||||
@restart_corosync_on_change()
|
@restart_corosync_on_change()
|
||||||
|
@ -1003,9 +1046,10 @@ def services_running():
|
||||||
if not (pacemaker_status and corosync_status):
|
if not (pacemaker_status and corosync_status):
|
||||||
# OS perspective
|
# OS perspective
|
||||||
return False
|
return False
|
||||||
else:
|
# Functional test of pacemaker. This will raise if pacemaker doesn't get
|
||||||
# Functional test of pacemaker
|
# fully ready in time:
|
||||||
return pcmk.wait_for_pcmk()
|
pcmk.wait_for_pcmk()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def validated_restart_corosync(retries=10):
|
def validated_restart_corosync(retries=10):
|
||||||
|
@ -1184,6 +1228,20 @@ def node_has_resources(node_name):
|
||||||
return has_resources
|
return has_resources
|
||||||
|
|
||||||
|
|
||||||
|
def node_is_dc(node_name):
|
||||||
|
"""Check if this node is the designated controller.
|
||||||
|
|
||||||
|
@param node_name: The name of the node to check
|
||||||
|
@returns boolean - True if node_name is the DC
|
||||||
|
"""
|
||||||
|
out = subprocess.check_output(['crm_mon', '-X']).decode('utf-8')
|
||||||
|
root = ET.fromstring(out)
|
||||||
|
for current_dc in root.iter("current_dc"):
|
||||||
|
if current_dc.attrib.get('name') == node_name:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def set_unit_status():
|
def set_unit_status():
|
||||||
"""Set the workload status for this unit
|
"""Set the workload status for this unit
|
||||||
|
|
||||||
|
@ -1493,3 +1551,58 @@ def is_stonith_configured():
|
||||||
"""
|
"""
|
||||||
configured = leader_get(STONITH_CONFIGURED) or 'False'
|
configured = leader_get(STONITH_CONFIGURED) or 'False'
|
||||||
return bool_from_string(configured)
|
return bool_from_string(configured)
|
||||||
|
|
||||||
|
|
||||||
|
def update_node_list():
|
||||||
|
"""Delete a node from the corosync ring when a Juju unit is removed.
|
||||||
|
|
||||||
|
:returns: Set of pcmk nodes not part of Juju hanode relation
|
||||||
|
:rtype: Set[str]
|
||||||
|
:raises: RemoveCorosyncNodeFailed
|
||||||
|
"""
|
||||||
|
pcmk_nodes = set(pcmk.list_nodes())
|
||||||
|
juju_nodes = {socket.gethostname()}
|
||||||
|
juju_hanode_rel = get_ha_nodes()
|
||||||
|
for corosync_id, addr in juju_hanode_rel.items():
|
||||||
|
peer_node_name = utils.get_hostname(addr, fqdn=False)
|
||||||
|
juju_nodes.add(peer_node_name)
|
||||||
|
|
||||||
|
diff_nodes = pcmk_nodes.difference(juju_nodes)
|
||||||
|
log("pcmk_nodes[{}], juju_nodes[{}], diff[{}]"
|
||||||
|
"".format(pcmk_nodes, juju_nodes, diff_nodes),
|
||||||
|
DEBUG)
|
||||||
|
|
||||||
|
for old_node in diff_nodes:
|
||||||
|
try:
|
||||||
|
pcmk.set_node_status_to_maintenance(old_node)
|
||||||
|
pcmk.delete_node(old_node)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise RemoveCorosyncNodeFailed(old_node, e)
|
||||||
|
|
||||||
|
return diff_nodes
|
||||||
|
|
||||||
|
|
||||||
|
def is_update_ring_requested(corosync_update_uuid):
|
||||||
|
log("Setting corosync-update-uuid=<uuid> in local kv", DEBUG)
|
||||||
|
with unitdata.HookData()() as t:
|
||||||
|
kv = t[0]
|
||||||
|
stored_value = kv.get('corosync-update-uuid')
|
||||||
|
if not stored_value or stored_value != corosync_update_uuid:
|
||||||
|
kv.set('corosync-update-uuid', corosync_update_uuid)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def trigger_corosync_update_from_leader(unit, rid):
|
||||||
|
corosync_update_uuid = relation_get(
|
||||||
|
attribute='trigger-corosync-update',
|
||||||
|
unit=unit, rid=rid,
|
||||||
|
)
|
||||||
|
if (corosync_update_uuid and
|
||||||
|
is_update_ring_requested(corosync_update_uuid) and
|
||||||
|
emit_corosync_conf()):
|
||||||
|
cmd = 'corosync-cfgtool -R'
|
||||||
|
pcmk.commit(cmd)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
|
@ -23,7 +23,11 @@ configure:
|
||||||
|
|
||||||
tests:
|
tests:
|
||||||
- zaza.openstack.charm_tests.hacluster.tests.HaclusterTest
|
- zaza.openstack.charm_tests.hacluster.tests.HaclusterTest
|
||||||
|
- zaza.openstack.charm_tests.hacluster.tests.HaclusterScaleBackAndForthTest
|
||||||
|
|
||||||
tests_options:
|
tests_options:
|
||||||
|
hacluster:
|
||||||
|
principle-app-name: keystone
|
||||||
|
hacluster-charm-name: hacluster
|
||||||
force_deploy:
|
force_deploy:
|
||||||
- groovy-victoria
|
- groovy-victoria
|
||||||
|
|
|
@ -42,6 +42,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
os.remove(self.tmpfile.name)
|
os.remove(self.tmpfile.name)
|
||||||
|
|
||||||
@mock.patch.object(pcmk.unitdata, 'kv')
|
@mock.patch.object(pcmk.unitdata, 'kv')
|
||||||
|
@mock.patch.object(hooks, 'remote_unit')
|
||||||
|
@mock.patch.object(hooks, 'principal_unit')
|
||||||
|
@mock.patch.object(hooks, 'trigger_corosync_update_from_leader')
|
||||||
@mock.patch.object(hooks, 'is_stonith_configured')
|
@mock.patch.object(hooks, 'is_stonith_configured')
|
||||||
@mock.patch.object(hooks, 'configure_peer_stonith_resource')
|
@mock.patch.object(hooks, 'configure_peer_stonith_resource')
|
||||||
@mock.patch.object(hooks, 'get_member_ready_nodes')
|
@mock.patch.object(hooks, 'get_member_ready_nodes')
|
||||||
|
@ -78,7 +81,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
configure_resources_on_remotes,
|
configure_resources_on_remotes,
|
||||||
get_member_ready_nodes,
|
get_member_ready_nodes,
|
||||||
configure_peer_stonith_resource,
|
configure_peer_stonith_resource,
|
||||||
is_stonith_configured, mock_kv):
|
is_stonith_configured,
|
||||||
|
trigger_corosync_update_from_leader,
|
||||||
|
principal_unit, remote_unit, mock_kv):
|
||||||
|
|
||||||
def fake_crm_opt_exists(res_name):
|
def fake_crm_opt_exists(res_name):
|
||||||
# res_ubuntu will take the "update resource" route
|
# res_ubuntu will take the "update resource" route
|
||||||
|
@ -104,6 +109,8 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
'cluster_count': 3,
|
'cluster_count': 3,
|
||||||
'failure_timeout': 180,
|
'failure_timeout': 180,
|
||||||
'cluster_recheck_interval': 60}
|
'cluster_recheck_interval': 60}
|
||||||
|
trigger_corosync_update_from_leader.return_value = False
|
||||||
|
principal_unit.return_value = remote_unit.return_value = ""
|
||||||
|
|
||||||
config.side_effect = lambda key: cfg.get(key)
|
config.side_effect = lambda key: cfg.get(key)
|
||||||
|
|
||||||
|
@ -165,6 +172,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
commit.assert_any_call(
|
commit.assert_any_call(
|
||||||
'crm -w -F configure %s %s %s' % (kw, name, params))
|
'crm -w -F configure %s %s %s' % (kw, name, params))
|
||||||
|
|
||||||
|
@mock.patch.object(hooks, 'remote_unit')
|
||||||
|
@mock.patch.object(hooks, 'principal_unit')
|
||||||
|
@mock.patch.object(hooks, 'trigger_corosync_update_from_leader')
|
||||||
@mock.patch.object(hooks, 'is_stonith_configured')
|
@mock.patch.object(hooks, 'is_stonith_configured')
|
||||||
@mock.patch.object(hooks, 'configure_peer_stonith_resource')
|
@mock.patch.object(hooks, 'configure_peer_stonith_resource')
|
||||||
@mock.patch.object(hooks, 'get_member_ready_nodes')
|
@mock.patch.object(hooks, 'get_member_ready_nodes')
|
||||||
|
@ -200,7 +210,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
configure_pacemaker_remote_stonith_resource,
|
configure_pacemaker_remote_stonith_resource,
|
||||||
configure_resources_on_remotes, get_member_ready_nodes,
|
configure_resources_on_remotes, get_member_ready_nodes,
|
||||||
configure_peer_stonith_resource,
|
configure_peer_stonith_resource,
|
||||||
is_stonith_configured):
|
is_stonith_configured,
|
||||||
|
trigger_corosync_update_from_leader,
|
||||||
|
principal_unit, remote_unit):
|
||||||
is_stonith_configured.return_value = False
|
is_stonith_configured.return_value = False
|
||||||
validate_dns_ha.return_value = True
|
validate_dns_ha.return_value = True
|
||||||
crm_opt_exists.return_value = False
|
crm_opt_exists.return_value = False
|
||||||
|
@ -218,6 +230,8 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
'cluster_count': 3,
|
'cluster_count': 3,
|
||||||
'maas_url': 'http://maas/MAAAS/',
|
'maas_url': 'http://maas/MAAAS/',
|
||||||
'maas_credentials': 'secret'}
|
'maas_credentials': 'secret'}
|
||||||
|
trigger_corosync_update_from_leader.return_value = False
|
||||||
|
principal_unit.return_value = remote_unit.return_value = ""
|
||||||
|
|
||||||
config.side_effect = lambda key: cfg.get(key)
|
config.side_effect = lambda key: cfg.get(key)
|
||||||
|
|
||||||
|
@ -248,6 +262,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
'params bar ip_address="172.16.0.1" maas_url="http://maas/MAAAS/" '
|
'params bar ip_address="172.16.0.1" maas_url="http://maas/MAAAS/" '
|
||||||
'maas_credentials="secret"')
|
'maas_credentials="secret"')
|
||||||
|
|
||||||
|
@mock.patch.object(hooks, 'remote_unit')
|
||||||
|
@mock.patch.object(hooks, 'principal_unit')
|
||||||
|
@mock.patch.object(hooks, 'trigger_corosync_update_from_leader')
|
||||||
@mock.patch.object(hooks, 'setup_maas_api')
|
@mock.patch.object(hooks, 'setup_maas_api')
|
||||||
@mock.patch.object(hooks, 'validate_dns_ha')
|
@mock.patch.object(hooks, 'validate_dns_ha')
|
||||||
@mock.patch('pcmk.wait_for_pcmk')
|
@mock.patch('pcmk.wait_for_pcmk')
|
||||||
|
@ -270,7 +287,9 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
relation_set, get_cluster_nodes, related_units, configure_stonith,
|
relation_set, get_cluster_nodes, related_units, configure_stonith,
|
||||||
configure_monitor_host, configure_cluster_global,
|
configure_monitor_host, configure_cluster_global,
|
||||||
configure_corosync, is_leader, crm_opt_exists,
|
configure_corosync, is_leader, crm_opt_exists,
|
||||||
wait_for_pcmk, validate_dns_ha, setup_maas_api):
|
wait_for_pcmk, validate_dns_ha, setup_maas_api,
|
||||||
|
trigger_corosync_update_from_leader,
|
||||||
|
principal_unit, remote_unit):
|
||||||
|
|
||||||
def fake_validate():
|
def fake_validate():
|
||||||
raise utils.MAASConfigIncomplete('DNS HA invalid config')
|
raise utils.MAASConfigIncomplete('DNS HA invalid config')
|
||||||
|
@ -289,6 +308,8 @@ class TestCorosyncConf(unittest.TestCase):
|
||||||
'cluster_count': 3,
|
'cluster_count': 3,
|
||||||
'maas_url': 'http://maas/MAAAS/',
|
'maas_url': 'http://maas/MAAAS/',
|
||||||
'maas_credentials': None}
|
'maas_credentials': None}
|
||||||
|
trigger_corosync_update_from_leader.return_value = False
|
||||||
|
principal_unit.return_value = remote_unit.return_value = ""
|
||||||
|
|
||||||
config.side_effect = lambda key: cfg.get(key)
|
config.side_effect = lambda key: cfg.get(key)
|
||||||
|
|
||||||
|
@ -391,7 +412,6 @@ class TestHooks(test_utils.CharmTestCase):
|
||||||
mock_is_stonith_configured.return_value = False
|
mock_is_stonith_configured.return_value = False
|
||||||
mock_config.side_effect = self.test_config.get
|
mock_config.side_effect = self.test_config.get
|
||||||
mock_relation_ids.return_value = ['hanode:1']
|
mock_relation_ids.return_value = ['hanode:1']
|
||||||
mock_wait_for_pcmk.return_value = True
|
|
||||||
mock_is_leader.return_value = True
|
mock_is_leader.return_value = True
|
||||||
hooks.config_changed()
|
hooks.config_changed()
|
||||||
mock_maintenance_mode.assert_not_called()
|
mock_maintenance_mode.assert_not_called()
|
||||||
|
|
|
@ -93,6 +93,10 @@ class UtilsTestCaseWriteTmp(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
class UtilsTestCase(unittest.TestCase):
|
class UtilsTestCase(unittest.TestCase):
|
||||||
|
def _testdata(self, filename):
|
||||||
|
return os.path.join(os.path.dirname(__file__),
|
||||||
|
'testdata',
|
||||||
|
filename)
|
||||||
|
|
||||||
@mock.patch.object(utils, 'config')
|
@mock.patch.object(utils, 'config')
|
||||||
def test_get_transport(self, mock_config):
|
def test_get_transport(self, mock_config):
|
||||||
|
@ -430,20 +434,19 @@ class UtilsTestCase(unittest.TestCase):
|
||||||
])
|
])
|
||||||
|
|
||||||
@mock.patch('pcmk.commit')
|
@mock.patch('pcmk.commit')
|
||||||
@mock.patch.object(utils, 'config')
|
|
||||||
@mock.patch.object(utils, 'configure_pacemaker_remote_stonith_resource')
|
@mock.patch.object(utils, 'configure_pacemaker_remote_stonith_resource')
|
||||||
def test_configure_stonith_stonith_enabled_false(
|
def test_configure_stonith_no_maas(
|
||||||
self,
|
self,
|
||||||
mock_cfg_pcmkr_rstonith_res,
|
mock_cfg_pcmkr_rstonith_res,
|
||||||
mock_config,
|
|
||||||
mock_commit):
|
mock_commit):
|
||||||
cfg = {
|
# Without MAAS this function will return no resource:
|
||||||
'stonith_enabled': 'false'}
|
|
||||||
mock_config.side_effect = lambda key: cfg.get(key)
|
|
||||||
mock_cfg_pcmkr_rstonith_res.return_value = []
|
mock_cfg_pcmkr_rstonith_res.return_value = []
|
||||||
|
|
||||||
utils.configure_stonith()
|
utils.configure_stonith()
|
||||||
|
|
||||||
mock_commit.assert_called_once_with(
|
mock_commit.assert_called_once_with(
|
||||||
'crm configure property stonith-enabled=false')
|
'crm configure property stonith-enabled=false',
|
||||||
|
failure_is_fatal=False)
|
||||||
|
|
||||||
@mock.patch.object(utils, 'relation_get')
|
@mock.patch.object(utils, 'relation_get')
|
||||||
def test_parse_data_json(self, relation_get):
|
def test_parse_data_json(self, relation_get):
|
||||||
|
@ -1260,3 +1263,57 @@ class UtilsTestCase(unittest.TestCase):
|
||||||
commit.assert_called_once_with(
|
commit.assert_called_once_with(
|
||||||
'crm configure property stonith-enabled=false',
|
'crm configure property stonith-enabled=false',
|
||||||
failure_is_fatal=True)
|
failure_is_fatal=True)
|
||||||
|
|
||||||
|
@mock.patch('subprocess.check_output')
|
||||||
|
def test_node_is_dc(self, mock_subprocess):
|
||||||
|
with open(self._testdata('test_crm_mon.xml'), 'r') as fd:
|
||||||
|
mock_subprocess.return_value = "".join(
|
||||||
|
fd.readlines()).encode("utf-8")
|
||||||
|
|
||||||
|
self.assertTrue(utils.node_is_dc('juju-2eebcf-0'))
|
||||||
|
|
||||||
|
@mock.patch.object(utils.unitdata, 'HookData')
|
||||||
|
def test_is_update_ring_requested(self, HookData):
|
||||||
|
hook_data = self.MockHookData()
|
||||||
|
HookData.return_value = hook_data
|
||||||
|
self.assertTrue(
|
||||||
|
utils.is_update_ring_requested('random-uuid-generated')
|
||||||
|
)
|
||||||
|
self.assertEquals(
|
||||||
|
hook_data.kv.get('corosync-update-uuid'),
|
||||||
|
'random-uuid-generated',
|
||||||
|
)
|
||||||
|
# No change in uuid means no new request has been issued
|
||||||
|
self.assertFalse(
|
||||||
|
utils.is_update_ring_requested('random-uuid-generated')
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch('pcmk.commit')
|
||||||
|
@mock.patch.object(utils, 'emit_corosync_conf')
|
||||||
|
@mock.patch.object(utils, 'is_update_ring_requested')
|
||||||
|
@mock.patch.object(utils, 'relation_get')
|
||||||
|
def test_trigger_corosync_update_from_leader(self, mock_relation_get,
|
||||||
|
mock_is_update_ring_req,
|
||||||
|
mock_emit_corosync_conf,
|
||||||
|
mock_commit):
|
||||||
|
# corosync-update-uuid is set and has changed:
|
||||||
|
mock_relation_get.return_value = 'random-uuid-generated'
|
||||||
|
mock_is_update_ring_req.return_value = True
|
||||||
|
|
||||||
|
mock_emit_corosync_conf.return_value = True
|
||||||
|
self.assertTrue(
|
||||||
|
utils.trigger_corosync_update_from_leader(
|
||||||
|
'hacluster/0',
|
||||||
|
'hanode:0',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
mock_commit.assert_has_calls([mock.call('corosync-cfgtool -R')])
|
||||||
|
|
||||||
|
# corosync-update-uuid isn't set:
|
||||||
|
mock_relation_get.return_value = None
|
||||||
|
self.assertFalse(
|
||||||
|
utils.trigger_corosync_update_from_leader(
|
||||||
|
'hacluster/0',
|
||||||
|
'hanode:0',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
|
@ -163,7 +163,8 @@ class TestPcmk(unittest.TestCase):
|
||||||
# Pacemaker is up
|
# Pacemaker is up
|
||||||
gethostname.return_value = 'hanode-1'
|
gethostname.return_value = 'hanode-1'
|
||||||
getstatusoutput.return_value = (0, 'Hosname: hanode-1')
|
getstatusoutput.return_value = (0, 'Hosname: hanode-1')
|
||||||
self.assertTrue(pcmk.wait_for_pcmk(retries=2, sleep=0))
|
# Here we are asserting that it doesn't raise anything:
|
||||||
|
pcmk.wait_for_pcmk(retries=2, sleep=0)
|
||||||
|
|
||||||
@mock.patch('subprocess.check_output')
|
@mock.patch('subprocess.check_output')
|
||||||
def test_crm_version(self, mock_check_output):
|
def test_crm_version(self, mock_check_output):
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<crm_mon version="1.1.18">
|
||||||
|
<summary>
|
||||||
|
<stack type="corosync" />
|
||||||
|
<current_dc present="true" version="1.1.18-2b07d5c5a9" name="juju-2eebcf-0" id="1000" with_quorum="true" />
|
||||||
|
<last_update time="Mon Jul 20 09:15:49 2020" />
|
||||||
|
<last_change time="Mon Jul 20 09:09:40 2020" user="hacluster" client="crmd" origin="juju-2eebcf-2" />
|
||||||
|
<nodes_configured number="3" expected_votes="unknown" />
|
||||||
|
<resources_configured number="5" disabled="0" blocked="0" />
|
||||||
|
<cluster_options stonith-enabled="false" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" />
|
||||||
|
</summary>
|
||||||
|
<nodes>
|
||||||
|
<node name="juju-2eebcf-0" id="1000" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="2" type="member" />
|
||||||
|
<node name="juju-2eebcf-2" id="1001" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="2" type="member" />
|
||||||
|
<node name="juju-2eebcf-3" id="1002" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="1" type="member" />
|
||||||
|
</nodes>
|
||||||
|
<resources>
|
||||||
|
<group id="grp_ks_vips" number_resources="1" >
|
||||||
|
<resource id="res_ks_0dd3a53_vip" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
|
||||||
|
<node name="juju-2eebcf-0" id="1000" cached="false"/>
|
||||||
|
</resource>
|
||||||
|
</group>
|
||||||
|
<clone id="cl_ks_haproxy" multi_state="false" unique="false" managed="true" failed="false" failure_ignored="false" >
|
||||||
|
<resource id="res_ks_haproxy" resource_agent="lsb:haproxy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
|
||||||
|
<node name="juju-2eebcf-2" id="1001" cached="false"/>
|
||||||
|
</resource>
|
||||||
|
<resource id="res_ks_haproxy" resource_agent="lsb:haproxy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
|
||||||
|
<node name="juju-2eebcf-0" id="1000" cached="false"/>
|
||||||
|
</resource>
|
||||||
|
<resource id="res_ks_haproxy" resource_agent="lsb:haproxy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
|
||||||
|
<node name="juju-2eebcf-3" id="1002" cached="false"/>
|
||||||
|
</resource>
|
||||||
|
</clone>
|
||||||
|
<resource id="res_ks_bc84550_vip" resource_agent="ocf::heartbeat:IPaddr2" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1" >
|
||||||
|
<node name="juju-2eebcf-2" id="1001" cached="false"/>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<node_attributes>
|
||||||
|
<node name="juju-2eebcf-0">
|
||||||
|
</node>
|
||||||
|
<node name="juju-2eebcf-2">
|
||||||
|
</node>
|
||||||
|
<node name="juju-2eebcf-3">
|
||||||
|
</node>
|
||||||
|
</node_attributes>
|
||||||
|
<node_history>
|
||||||
|
<node name="juju-2eebcf-2">
|
||||||
|
<resource_history id="res_ks_bc84550_vip" orphan="false" migration-threshold="1000000">
|
||||||
|
<operation_history call="56" task="start" last-rc-change="Mon Jul 20 09:09:37 2020" last-run="Mon Jul 20 09:09:37 2020" exec-time="548ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="57" task="monitor" interval="10000ms" last-rc-change="Mon Jul 20 09:09:38 2020" exec-time="534ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
</resource_history>
|
||||||
|
<resource_history id="res_ks_haproxy" orphan="false" migration-threshold="1000000">
|
||||||
|
<operation_history call="64" task="probe" last-rc-change="Mon Jul 20 09:09:40 2020" last-run="Mon Jul 20 09:09:40 2020" exec-time="554ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="64" task="probe" last-rc-change="Mon Jul 20 09:09:40 2020" last-run="Mon Jul 20 09:09:40 2020" exec-time="554ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="70" task="monitor" interval="5000ms" last-rc-change="Mon Jul 20 09:09:41 2020" exec-time="804ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
</resource_history>
|
||||||
|
</node>
|
||||||
|
<node name="juju-2eebcf-0">
|
||||||
|
<resource_history id="res_ks_haproxy" orphan="false" migration-threshold="1000000">
|
||||||
|
<operation_history call="80" task="start" last-rc-change="Mon Jul 20 09:09:41 2020" last-run="Mon Jul 20 09:09:41 2020" exec-time="824ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="82" task="monitor" interval="5000ms" last-rc-change="Mon Jul 20 09:09:42 2020" exec-time="534ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
</resource_history>
|
||||||
|
<resource_history id="res_ks_0dd3a53_vip" orphan="false" migration-threshold="1000000">
|
||||||
|
<operation_history call="79" task="probe" last-rc-change="Mon Jul 20 09:09:41 2020" last-run="Mon Jul 20 09:09:41 2020" exec-time="753ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="79" task="probe" last-rc-change="Mon Jul 20 09:09:41 2020" last-run="Mon Jul 20 09:09:41 2020" exec-time="753ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="81" task="monitor" interval="10000ms" last-rc-change="Mon Jul 20 09:09:42 2020" exec-time="542ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
</resource_history>
|
||||||
|
</node>
|
||||||
|
<node name="juju-2eebcf-3">
|
||||||
|
<resource_history id="res_ks_haproxy" orphan="false" migration-threshold="1000000">
|
||||||
|
<operation_history call="62" task="probe" last-rc-change="Mon Jul 20 09:09:40 2020" last-run="Mon Jul 20 09:09:40 2020" exec-time="547ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="62" task="probe" last-rc-change="Mon Jul 20 09:09:40 2020" last-run="Mon Jul 20 09:09:40 2020" exec-time="547ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
<operation_history call="68" task="monitor" interval="5000ms" last-rc-change="Mon Jul 20 09:09:41 2020" exec-time="751ms" queue-time="0ms" rc="0" rc_text="ok" />
|
||||||
|
</resource_history>
|
||||||
|
</node>
|
||||||
|
</node_history>
|
||||||
|
<tickets>
|
||||||
|
</tickets>
|
||||||
|
<bans>
|
||||||
|
</bans>
|
||||||
|
</crm_mon>
|
Loading…
Reference in New Issue