Add action to force resync of images in all pools on local endpoint
There exist failure scenarios where abrupt shutdown and/or interruptions to communication may lead to a split-brain situation where the RBD Mirroring process in both Ceph clusters claim to be the primary. In such a situation the operator must decide which cluster has the most recent data and should be elected primary by using the ``demote`` and ``promote`` (optionally with force parameter) actions. After making this decision the secondary cluster must be resynced to track the promoted master, this is done by running the ``resync-pools`` action on the non-master cluster. Change-Id: I4f57c9202ed4d055066286f808369ec0ddddb7ea
This commit is contained in:
parent
0770640158
commit
79bc4e1379
|
@ -6,7 +6,32 @@ Ceph 12.2 Luminous or later is required.
|
|||
|
||||
# Usage
|
||||
|
||||
TBC
|
||||
## Recovering from abrupt shutdown
|
||||
|
||||
There exist failure scenarios where abrupt shutdown and/or interruptions to
|
||||
communication may lead to a split-brain situation where the RBD Mirroring
|
||||
process in both Ceph clusters claim to be the primary.
|
||||
|
||||
In such a situation the operator must decide which cluster has the most
|
||||
recent data and should be elected primary by using the ``demote`` and
|
||||
``promote`` (optionally with force parameter) actions.
|
||||
|
||||
After making this decision the secondary cluster must be resynced to track
|
||||
the promoted master, this is done by running the ``resync-pools`` action on
|
||||
the non-master cluster.
|
||||
|
||||
juju run-action -m site-b ceph-rbd-mirror/leader --wait demote
|
||||
juju run-action -m site-a ceph-rbd-mirror/leader --wait promote force=True
|
||||
|
||||
juju run-action -m site-a ceph-rbd-mirror/leader --wait status verbose=True
|
||||
juju run-action -m site-b ceph-rbd-mirror/leader --wait status verbose=True
|
||||
|
||||
juju run-action -m site-b ceph-rbd-mirror/leader --wait resync-pools i-really-mean-it=True
|
||||
|
||||
__NOTE__ When using Ceph Luminous, the mirror state information will not be
|
||||
accurate after recovering from unclean shutdown. Regardless of the output of
|
||||
the status information you will be able to write to images after a forced
|
||||
promote.
|
||||
|
||||
# Bugs
|
||||
|
||||
|
|
|
@ -16,6 +16,18 @@ refresh-pools:
|
|||
Refresh list of pools from local and remote Ceph endpoint.
|
||||
As a side effect, mirroring will be configured for any manually created
|
||||
pools that the charm currently does not know about.
|
||||
resync-pools:
|
||||
description: |
|
||||
\
|
||||
USE WITH CAUTION - Force image resync for all images in pools on local
|
||||
Ceph endpoint.
|
||||
params:
|
||||
i-really-mean-it:
|
||||
type: boolean
|
||||
description: |
|
||||
This must be set to true to perform the action
|
||||
required:
|
||||
- i-really-mean-it
|
||||
status:
|
||||
description: |
|
||||
Get mirror pool status
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
|
@ -95,10 +96,41 @@ def refresh_pools(args):
|
|||
return reactive.main()
|
||||
|
||||
|
||||
def resync_pools(args):
|
||||
"""Force image resync on pools in local Ceph endpoint."""
|
||||
if not ch_core.hookenv.action_get('i-really-mean-it'):
|
||||
ch_core.hookenv.action_fail('Required parameter not set')
|
||||
return
|
||||
with charms_openstack.charm.provide_charm_instance() as charm:
|
||||
ceph_local = reactive.endpoint_from_name('ceph-local')
|
||||
pools = charm.eligible_pools(ceph_local.pools)
|
||||
result = collections.defaultdict(dict)
|
||||
for pool in pools:
|
||||
# list images in pool
|
||||
output = subprocess.check_output(
|
||||
['rbd', '--id', charm.ceph_id, '--format', 'json',
|
||||
'-p', pool, 'ls'], universal_newlines=True)
|
||||
images = json.loads(output)
|
||||
for image in images:
|
||||
output = subprocess.check_output(
|
||||
['rbd', '--id', charm.ceph_id, 'mirror', 'image', 'resync',
|
||||
'{}/{}'.format(pool, image)], universal_newlines=True)
|
||||
result[pool][image] = output.rstrip()
|
||||
output_str = ''
|
||||
for pool in result:
|
||||
for image in result[pool]:
|
||||
if output_str:
|
||||
output_str += '\n'
|
||||
output_str += '{}/{}: {}'.format(pool, image,
|
||||
result[pool][image])
|
||||
ch_core.hookenv.action_set({'output': output_str})
|
||||
|
||||
|
||||
ACTIONS = {
|
||||
'demote': rbd_mirror_action,
|
||||
'promote': rbd_mirror_action,
|
||||
'refresh-pools': refresh_pools,
|
||||
'resync-pools': resync_pools,
|
||||
'status': rbd_mirror_action,
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
actions.py
|
|
@ -13,6 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import json
|
||||
import mock
|
||||
import sys
|
||||
|
||||
|
@ -111,6 +112,31 @@ class TestCephRBDMirrorActions(test_utils.PatchHelper):
|
|||
self._KV.flush.assert_called_once_with()
|
||||
self.main.assert_called_once_with()
|
||||
|
||||
def test_resync_pools(self):
|
||||
self.patch_object(actions.reactive, 'endpoint_from_name')
|
||||
self.patch_object(actions.ch_core.hookenv, 'action_get')
|
||||
self.patch_object(actions.subprocess, 'check_output')
|
||||
self.patch_object(actions.ch_core.hookenv, 'action_set')
|
||||
endpoint = mock.MagicMock()
|
||||
endpoint.pools = collections.OrderedDict(
|
||||
{'apool': {'applications': {'rbd': {}}}})
|
||||
self.endpoint_from_name.return_value = endpoint
|
||||
self.crm_charm.eligible_pools.return_value = endpoint.pools
|
||||
self.crm_charm.ceph_id = 'acephid'
|
||||
self.action_get.return_value = False
|
||||
actions.resync_pools([])
|
||||
self.assertFalse(self.check_output.called)
|
||||
self.assertFalse(self.action_set.called)
|
||||
self.action_get.return_value = True
|
||||
self.check_output.side_effect = [
|
||||
json.dumps(['imagea']),
|
||||
'resync flagged for imagea\n',
|
||||
]
|
||||
actions.resync_pools([])
|
||||
self.assertEquals(
|
||||
sorted(self.action_set.call_args[0][0]['output'].split('\n')),
|
||||
['apool/imagea: resync flagged for imagea'])
|
||||
|
||||
def test_main(self):
|
||||
self.patch_object(actions, 'ACTIONS')
|
||||
self.patch_object(actions.ch_core.hookenv, 'action_fail')
|
||||
|
|
Loading…
Reference in New Issue