Add action to force resync of images in all pools on local endpoint
There exist failure scenarios where abrupt shutdown and/or interruptions to communication may lead to a split-brain situation where the RBD Mirroring process in both Ceph clusters claim to be the primary. In such a situation the operator must decide which cluster has the most recent data and should be elected primary by using the ``demote`` and ``promote`` (optionally with force parameter) actions. After making this decision the secondary cluster must be resynced to track the promoted master, this is done by running the ``resync-pools`` action on the non-master cluster. Change-Id: I4f57c9202ed4d055066286f808369ec0ddddb7ea
This commit is contained in:
parent
0770640158
commit
79bc4e1379
|
@ -6,7 +6,32 @@ Ceph 12.2 Luminous or later is required.
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
|
|
||||||
TBC
|
## Recovering from abrupt shutdown
|
||||||
|
|
||||||
|
There exist failure scenarios where abrupt shutdown and/or interruptions to
|
||||||
|
communication may lead to a split-brain situation where the RBD Mirroring
|
||||||
|
process in both Ceph clusters claim to be the primary.
|
||||||
|
|
||||||
|
In such a situation the operator must decide which cluster has the most
|
||||||
|
recent data and should be elected primary by using the ``demote`` and
|
||||||
|
``promote`` (optionally with force parameter) actions.
|
||||||
|
|
||||||
|
After making this decision the secondary cluster must be resynced to track
|
||||||
|
the promoted master, this is done by running the ``resync-pools`` action on
|
||||||
|
the non-master cluster.
|
||||||
|
|
||||||
|
juju run-action -m site-b ceph-rbd-mirror/leader --wait demote
|
||||||
|
juju run-action -m site-a ceph-rbd-mirror/leader --wait promote force=True
|
||||||
|
|
||||||
|
juju run-action -m site-a ceph-rbd-mirror/leader --wait status verbose=True
|
||||||
|
juju run-action -m site-b ceph-rbd-mirror/leader --wait status verbose=True
|
||||||
|
|
||||||
|
juju run-action -m site-b ceph-rbd-mirror/leader --wait resync-pools i-really-mean-it=True
|
||||||
|
|
||||||
|
__NOTE__ When using Ceph Luminous, the mirror state information will not be
|
||||||
|
accurate after recovering from unclean shutdown. Regardless of the output of
|
||||||
|
the status information you will be able to write to images after a forced
|
||||||
|
promote.
|
||||||
|
|
||||||
# Bugs
|
# Bugs
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,18 @@ refresh-pools:
|
||||||
Refresh list of pools from local and remote Ceph endpoint.
|
Refresh list of pools from local and remote Ceph endpoint.
|
||||||
As a side effect, mirroring will be configured for any manually created
|
As a side effect, mirroring will be configured for any manually created
|
||||||
pools that the charm currently does not know about.
|
pools that the charm currently does not know about.
|
||||||
|
resync-pools:
|
||||||
|
description: |
|
||||||
|
\
|
||||||
|
USE WITH CAUTION - Force image resync for all images in pools on local
|
||||||
|
Ceph endpoint.
|
||||||
|
params:
|
||||||
|
i-really-mean-it:
|
||||||
|
type: boolean
|
||||||
|
description: |
|
||||||
|
This must be set to true to perform the action
|
||||||
|
required:
|
||||||
|
- i-really-mean-it
|
||||||
status:
|
status:
|
||||||
description: |
|
description: |
|
||||||
Get mirror pool status
|
Get mirror pool status
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import collections
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
@ -95,10 +96,41 @@ def refresh_pools(args):
|
||||||
return reactive.main()
|
return reactive.main()
|
||||||
|
|
||||||
|
|
||||||
|
def resync_pools(args):
|
||||||
|
"""Force image resync on pools in local Ceph endpoint."""
|
||||||
|
if not ch_core.hookenv.action_get('i-really-mean-it'):
|
||||||
|
ch_core.hookenv.action_fail('Required parameter not set')
|
||||||
|
return
|
||||||
|
with charms_openstack.charm.provide_charm_instance() as charm:
|
||||||
|
ceph_local = reactive.endpoint_from_name('ceph-local')
|
||||||
|
pools = charm.eligible_pools(ceph_local.pools)
|
||||||
|
result = collections.defaultdict(dict)
|
||||||
|
for pool in pools:
|
||||||
|
# list images in pool
|
||||||
|
output = subprocess.check_output(
|
||||||
|
['rbd', '--id', charm.ceph_id, '--format', 'json',
|
||||||
|
'-p', pool, 'ls'], universal_newlines=True)
|
||||||
|
images = json.loads(output)
|
||||||
|
for image in images:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
['rbd', '--id', charm.ceph_id, 'mirror', 'image', 'resync',
|
||||||
|
'{}/{}'.format(pool, image)], universal_newlines=True)
|
||||||
|
result[pool][image] = output.rstrip()
|
||||||
|
output_str = ''
|
||||||
|
for pool in result:
|
||||||
|
for image in result[pool]:
|
||||||
|
if output_str:
|
||||||
|
output_str += '\n'
|
||||||
|
output_str += '{}/{}: {}'.format(pool, image,
|
||||||
|
result[pool][image])
|
||||||
|
ch_core.hookenv.action_set({'output': output_str})
|
||||||
|
|
||||||
|
|
||||||
ACTIONS = {
|
ACTIONS = {
|
||||||
'demote': rbd_mirror_action,
|
'demote': rbd_mirror_action,
|
||||||
'promote': rbd_mirror_action,
|
'promote': rbd_mirror_action,
|
||||||
'refresh-pools': refresh_pools,
|
'refresh-pools': refresh_pools,
|
||||||
|
'resync-pools': resync_pools,
|
||||||
'status': rbd_mirror_action,
|
'status': rbd_mirror_action,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
actions.py
|
|
@ -13,6 +13,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import json
|
||||||
import mock
|
import mock
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -111,6 +112,31 @@ class TestCephRBDMirrorActions(test_utils.PatchHelper):
|
||||||
self._KV.flush.assert_called_once_with()
|
self._KV.flush.assert_called_once_with()
|
||||||
self.main.assert_called_once_with()
|
self.main.assert_called_once_with()
|
||||||
|
|
||||||
|
def test_resync_pools(self):
|
||||||
|
self.patch_object(actions.reactive, 'endpoint_from_name')
|
||||||
|
self.patch_object(actions.ch_core.hookenv, 'action_get')
|
||||||
|
self.patch_object(actions.subprocess, 'check_output')
|
||||||
|
self.patch_object(actions.ch_core.hookenv, 'action_set')
|
||||||
|
endpoint = mock.MagicMock()
|
||||||
|
endpoint.pools = collections.OrderedDict(
|
||||||
|
{'apool': {'applications': {'rbd': {}}}})
|
||||||
|
self.endpoint_from_name.return_value = endpoint
|
||||||
|
self.crm_charm.eligible_pools.return_value = endpoint.pools
|
||||||
|
self.crm_charm.ceph_id = 'acephid'
|
||||||
|
self.action_get.return_value = False
|
||||||
|
actions.resync_pools([])
|
||||||
|
self.assertFalse(self.check_output.called)
|
||||||
|
self.assertFalse(self.action_set.called)
|
||||||
|
self.action_get.return_value = True
|
||||||
|
self.check_output.side_effect = [
|
||||||
|
json.dumps(['imagea']),
|
||||||
|
'resync flagged for imagea\n',
|
||||||
|
]
|
||||||
|
actions.resync_pools([])
|
||||||
|
self.assertEquals(
|
||||||
|
sorted(self.action_set.call_args[0][0]['output'].split('\n')),
|
||||||
|
['apool/imagea: resync flagged for imagea'])
|
||||||
|
|
||||||
def test_main(self):
|
def test_main(self):
|
||||||
self.patch_object(actions, 'ACTIONS')
|
self.patch_object(actions, 'ACTIONS')
|
||||||
self.patch_object(actions.ch_core.hookenv, 'action_fail')
|
self.patch_object(actions.ch_core.hookenv, 'action_fail')
|
||||||
|
|
Loading…
Reference in New Issue