Add quiesce_wait_time option to share replica promote API.

Currently netapp_snapmirror_quiesce_timeout is an option of replica
promote that can only be set by the operator. There are scenarios where
high timeout value set by operator does not fit well e.g. disaster
stroke and user want to do an unplanned failover fast. Added new
option 'quiesce_wait_time' to share replica promote API which allows
to use specified wait_time(in seconds) instead of config option.

Closes-bug: #2000171
Change-Id: Ib02063ee8b82f7374cd89f90e7f24a845c6c7cd7
This commit is contained in:
Kiran Pawar 2022-12-23 12:41:37 +00:00
parent 1d4beed95b
commit 5f0f14a7f8
25 changed files with 197 additions and 48 deletions

View File

@ -3015,6 +3015,13 @@ share_replica_id:
in: body
required: true
type: string
share_replica_quiesce_wait_time:
description: |
The quiesce wait time in seconds used during replica promote.
in: body
required: false
type: integer
min_version: 2.75
share_replica_replica_state:
description: |
The replica state of a share replica. List of possible values:

View File

@ -1,3 +1,5 @@
{
"promote": null
"promote": {
"quiesce_wait_time": 30
}
}

View File

@ -154,6 +154,7 @@ Request
- project_id: project_id_path
- share_replica_id: share_replica_id_path
- quiesce_wait_time: share_replica_quiesce_wait_time
Request example
---------------

View File

@ -190,6 +190,8 @@ REST_API_VERSION_HISTORY = """
* 2.73 - Added Share Snapshot Metadata to Metadata API
* 2.74 - Allow/deny share access rule even if share replicas are in
'error' state.
* 2.75 - Added option to specify quiesce wait time in share replica
promote API.
"""
@ -197,7 +199,7 @@ REST_API_VERSION_HISTORY = """
# The default api version request is defined to be the
# minimum version of the API supported.
_MIN_API_VERSION = "2.0"
_MAX_API_VERSION = "2.74"
_MAX_API_VERSION = "2.75"
DEFAULT_API_VERSION = _MIN_API_VERSION

View File

@ -17,6 +17,7 @@
from http import client as http_client
from oslo_utils import strutils
import webob
from webob import exc
@ -241,14 +242,21 @@ class ShareReplicationController(wsgi.Controller, wsgi.AdminActionsMixin):
def promote(self, req, id, body):
return self._promote(req, id, body)
@wsgi.Controller.api_version(GRADUATION_VERSION) # noqa
@wsgi.Controller.api_version(GRADUATION_VERSION, "2.74") # noqa
@wsgi.response(202)
@wsgi.action('promote')
def promote(self, req, id, body): # pylint: disable=function-redefined # noqa F811
return self._promote(req, id, body)
@wsgi.Controller.api_version("2.75") # noqa
@wsgi.response(202)
@wsgi.action('promote')
def promote(self, req, id, body): # pylint: disable=function-redefined # noqa F811
return self._promote(req, id, body)
return self._promote(req, id, body, allow_quiesce_wait_time=True)
@wsgi.Controller.authorize('promote')
def _promote(self, req, id, body):
def _promote(self, req, id, body,
allow_quiesce_wait_time=False):
"""Promote a replica to active state."""
context = req.environ['manila.context']
@ -268,8 +276,21 @@ class ShareReplicationController(wsgi.Controller, wsgi.AdminActionsMixin):
if replica_state == constants.REPLICA_STATE_ACTIVE:
return webob.Response(status_int=http_client.OK)
quiesce_wait_time = None
if allow_quiesce_wait_time:
wait_time = body.get('promote', {}).get('quiesce_wait_time')
if wait_time:
if not strutils.is_int_like(wait_time) or int(wait_time) <= 0:
msg = _("quiesce_wait_time must be an integer and "
"greater than 0.")
raise exc.HTTPBadRequest(explanation=msg)
else:
quiesce_wait_time = int(wait_time)
try:
replica = self.share_api.promote_share_replica(context, replica)
replica = self.share_api.promote_share_replica(
context, replica,
quiesce_wait_time=quiesce_wait_time)
except exception.ReplicationException as e:
raise exc.HTTPBadRequest(explanation=e.message)
except exception.AdminRequired as e:

View File

@ -849,7 +849,8 @@ class API(base.Base):
share_replica,
force=force)
def promote_share_replica(self, context, share_replica):
def promote_share_replica(self, context, share_replica,
quiesce_wait_time=None):
if share_replica.get('status') != constants.STATUS_AVAILABLE:
msg = _("Replica %(replica_id)s must be in %(status)s state to be "
@ -872,7 +873,9 @@ class API(base.Base):
context, share_replica['id'],
{'status': constants.STATUS_REPLICATION_CHANGE})
self.share_rpcapi.promote_share_replica(context, share_replica)
self.share_rpcapi.promote_share_replica(
context, share_replica,
quiesce_wait_time=quiesce_wait_time)
return self.db.share_replica_get(context, share_replica['id'])

View File

@ -2068,7 +2068,7 @@ class ShareDriver(object):
raise NotImplementedError()
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Promote a replica to 'active' replica state.
.. note::
@ -2152,6 +2152,8 @@ class ShareDriver(object):
:param share_server: <models.ShareServer> or None
Share server of the replica to be promoted.
:param quiesce_wait_time: time in seconds or None
Share replica promote quiesce wait time.
:return: updated_replica_list or None.
The driver can return the updated list as in the request
parameter. Changes that will be updated to the Database are:

View File

@ -110,7 +110,7 @@ class HuaweiBase(metaclass=abc.ABCMeta):
@abc.abstractmethod
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Promote a replica to 'active' replica state."""
@abc.abstractmethod

View File

@ -254,7 +254,7 @@ class HuaweiNasDriver(driver.ShareDriver):
share_server)
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Promote a replica to 'active' replica state.."""
return self.plugin.promote_replica(context,
replica_list,

View File

@ -1769,7 +1769,7 @@ class V3StorageConnection(driver.HuaweiBase):
return self.replica_mgr.get_replica_state(replica_pair_id)
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
replica_pair_id = self.private_storage.get(replica['share_id'],
'replica_pair_id')
if replica_pair_id is None:

View File

@ -317,7 +317,8 @@ class DataMotionSession(object):
dest_vserver,
clear_checkpoint=False)
def quiesce_then_abort(self, source_share_obj, dest_share_obj):
def quiesce_then_abort(self, source_share_obj, dest_share_obj,
quiesce_wait_time=None):
dest_volume, dest_vserver, dest_backend = (
self.get_backend_info_for_share(dest_share_obj))
dest_client = get_client_for_backend(dest_backend,
@ -333,7 +334,9 @@ class DataMotionSession(object):
dest_volume)
config = get_backend_configuration(dest_backend)
retries = config.netapp_snapmirror_quiesce_timeout / 5
timeout = (
quiesce_wait_time or config.netapp_snapmirror_quiesce_timeout)
retries = int(timeout / 5) or 1
@utils.retry(retry_param=exception.ReplicationException,
interval=5,
@ -358,7 +361,8 @@ class DataMotionSession(object):
dest_volume,
clear_checkpoint=False)
def break_snapmirror(self, source_share_obj, dest_share_obj, mount=True):
def break_snapmirror(self, source_share_obj, dest_share_obj, mount=True,
quiesce_wait_time=None):
"""Breaks SnapMirror relationship.
1. Quiesce any ongoing snapmirror transfers
@ -375,7 +379,8 @@ class DataMotionSession(object):
source_share_obj)
# 1. Attempt to quiesce, then abort
self.quiesce_then_abort(source_share_obj, dest_share_obj)
self.quiesce_then_abort(source_share_obj, dest_share_obj,
quiesce_wait_time=quiesce_wait_time)
# 2. Break SnapMirror
dest_client.break_snapmirror_vol(src_vserver,

View File

@ -154,10 +154,12 @@ class NetAppCmodeMultiSvmShareDriver(driver.ShareDriver):
replica_snapshots)
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
return self.library.promote_replica(context, replica_list, replica,
access_rules,
share_server=share_server)
share_server=None, quiesce_wait_time=None):
return self.library.promote_replica(
context, replica_list, replica,
access_rules,
share_server=share_server,
quiesce_wait_time=quiesce_wait_time)
def update_replica_state(self, context, replica_list, replica,
access_rules, replica_snapshots,

View File

@ -143,10 +143,12 @@ class NetAppCmodeSingleSvmShareDriver(driver.ShareDriver):
replica_snapshots, **kwargs)
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
return self.library.promote_replica(context, replica_list, replica,
access_rules,
share_server=share_server)
share_server=None, quiesce_wait_time=None):
return self.library.promote_replica(
context, replica_list, replica,
access_rules,
share_server=share_server,
quiesce_wait_time=quiesce_wait_time)
def update_replica_state(self, context, replica_list, replica,
access_rules, replica_snapshots,

View File

@ -2683,7 +2683,7 @@ class NetAppCmodeFileStorageLibrary(object):
return constants.REPLICA_STATE_IN_SYNC
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Switch SnapMirror relationships and allow r/w ops on replica.
Creates a DataMotion session and switches the direction of the
@ -2701,6 +2701,7 @@ class NetAppCmodeFileStorageLibrary(object):
:param replica: Replica to promote to SnapMirror source
:param access_rules: Access rules to apply to the replica
:param share_server: ShareServer class instance of replica
:param quiesce_wait_time: Wait time in seconds for snapmirror quiesce
:return: Updated replica_list
"""
orig_active_replica = self.find_active_replica(replica_list)
@ -2714,7 +2715,8 @@ class NetAppCmodeFileStorageLibrary(object):
new_active_replica = (
self._convert_destination_replica_to_independent(
context, dm_session, orig_active_replica, replica,
access_rules, share_server=share_server))
access_rules, share_server=share_server,
quiesce_wait_time=quiesce_wait_time))
except exception.StorageCommunicationException:
LOG.exception("Could not communicate with the backend "
"for replica %s during promotion.",
@ -2842,7 +2844,7 @@ class NetAppCmodeFileStorageLibrary(object):
def _convert_destination_replica_to_independent(
self, context, dm_session, orig_active_replica, replica,
access_rules, share_server=None):
access_rules, share_server=None, quiesce_wait_time=None):
"""Breaks SnapMirror and allows r/w ops on the destination replica.
For promotion, the existing SnapMirror relationship must be broken
@ -2855,6 +2857,7 @@ class NetAppCmodeFileStorageLibrary(object):
:param replica: Replica to promote to SnapMirror source
:param access_rules: Access rules to apply to the replica
:param share_server: ShareServer class instance of replica
:param quiesce_wait_time: Wait time in seconds for snapmirror quiesce
:return: Updated replica
"""
vserver, vserver_client = self._get_vserver(share_server=share_server)
@ -2869,7 +2872,8 @@ class NetAppCmodeFileStorageLibrary(object):
# unreachable
pass
# 2. Break SnapMirror
dm_session.break_snapmirror(orig_active_replica, replica)
dm_session.break_snapmirror(orig_active_replica, replica,
quiesce_wait_time=quiesce_wait_time)
# 3. Setup access rules
new_active_replica = replica.copy()

View File

@ -1127,7 +1127,7 @@ class ZFSonLinuxShareDriver(zfs_utils.ExecuteMixin, driver.ShareDriver):
@ensure_share_server_not_provided
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Promotes secondary replica to active and active to secondary."""
active_replica = self._get_active_replica(replica_list)
src_dataset_name = self.private_storage.get(

View File

@ -243,7 +243,7 @@ def add_hooks(f):
class ShareManager(manager.SchedulerDependentManager):
"""Manages NAS storages."""
RPC_API_VERSION = '1.23'
RPC_API_VERSION = '1.24'
def __init__(self, share_driver=None, service_name=None, *args, **kwargs):
"""Load the driver from args, or from flags."""
@ -2494,7 +2494,8 @@ class ShareManager(manager.SchedulerDependentManager):
@add_hooks
@utils.require_driver_initialized
@locked_share_replica_operation
def promote_share_replica(self, context, share_replica_id, share_id=None):
def promote_share_replica(self, context, share_replica_id, share_id=None,
quiesce_wait_time=None):
"""Promote a share replica to active state."""
context = context.elevated()
share_replica = self.db.share_replica_get(
@ -2549,7 +2550,8 @@ class ShareManager(manager.SchedulerDependentManager):
updated_replica_list = (
self.driver.promote_replica(
context, replica_list, share_replica, access_rules,
share_server=share_server)
share_server=share_server,
quiesce_wait_time=quiesce_wait_time)
)
except Exception as excep:
with excutils.save_and_reraise_exception():

View File

@ -83,6 +83,7 @@ class ShareAPI(object):
check_update_share_network_security_service()
1.23 - Add update_share_server_network_allocations() and
check_update_share_server_network_allocations()
1.24 - Add quiesce_wait_time paramater to promote_share_replica()
"""
BASE_RPC_API_VERSION = '1.0'
@ -91,7 +92,7 @@ class ShareAPI(object):
super(ShareAPI, self).__init__()
target = messaging.Target(topic=CONF.share_topic,
version=self.BASE_RPC_API_VERSION)
self.client = rpc.get_client(target, version_cap='1.23')
self.client = rpc.get_client(target, version_cap='1.24')
def create_share_instance(self, context, share_instance, host,
request_spec, filter_properties,
@ -374,13 +375,15 @@ class ShareAPI(object):
share_id=share_replica['share_id'],
force=force)
def promote_share_replica(self, context, share_replica):
def promote_share_replica(self, context, share_replica,
quiesce_wait_time=None):
host = utils.extract_host(share_replica['host'])
call_context = self.client.prepare(server=host, version='1.8')
call_context = self.client.prepare(server=host, version='1.24')
call_context.cast(context,
'promote_share_replica',
share_replica_id=share_replica['id'],
share_id=share_replica['share_id'])
share_id=share_replica['share_id'],
quiesce_wait_time=quiesce_wait_time)
def update_share_replica(self, context, share_replica):
host = utils.extract_host(share_replica['host'])

View File

@ -19,6 +19,7 @@ import copy
import ddt
from oslo_config import cfg
from oslo_serialization import jsonutils
from oslo_utils import strutils
from webob import exc
from manila.api import common
@ -38,6 +39,7 @@ CONF = cfg.CONF
CAST_RULES_READONLY_VERSION = '2.30'
PRE_GRADUATION_VERSION = '2.55'
GRADUATION_VERSION = '2.56'
PROMOTE_QUIESCE_WAIT_VERSION = '2.75'
@ddt.ddt
@ -679,6 +681,42 @@ class ShareReplicasApiTest(test.TestCase):
self.mock_policy_check.assert_called_once_with(
context, self.resource_name, 'promote')
@ddt.data(('2.74', None),
(PROMOTE_QUIESCE_WAIT_VERSION, None),
(PROMOTE_QUIESCE_WAIT_VERSION, 10),
(PROMOTE_QUIESCE_WAIT_VERSION, 'foobar'),
)
@ddt.unpack
def test_promote_quiesce_wait_time(self, microversion, time):
body = {'promote': {'quiesce_wait_time': time}}
replica, expected_replica = self._get_fake_replica(
replica_state=constants.REPLICA_STATE_IN_SYNC,
microversion=microversion)
self.mock_object(share_replicas.db, 'share_replica_get',
mock.Mock(return_value=replica))
self.mock_object(share_replicas.db, 'share_network_get',
mock.Mock(return_value=self.fake_share_network))
req = self._get_request(microversion=microversion)
allow_quiesce_wait_time = False
if (api_version.APIVersionRequest(microversion) >=
api_version.APIVersionRequest(PROMOTE_QUIESCE_WAIT_VERSION)):
allow_quiesce_wait_time = True
if time and allow_quiesce_wait_time:
if strutils.is_int_like(time):
mock_api_promote_replica_call = self.mock_object(
share.API, 'promote_share_replica',
mock.Mock(return_value=replica))
resp = self.controller.promote(req, replica['id'], body)
self.assertEqual(expected_replica, resp['share_replica'])
self.assertTrue(mock_api_promote_replica_call.called)
else:
self.assertRaises(exc.HTTPBadRequest,
self.controller.promote,
req,
replica['id'],
body)
@ddt.data('index', 'detail', '_show', '_create', '_delete_share_replica',
'_promote', 'reset_replica_state', 'reset_status', '_resync')
def test_policy_not_authorized(self, method_name):

View File

@ -584,7 +584,7 @@ class DummyDriver(driver.ShareDriver):
@slow_me_down
def promote_replica(self, context, replica_list, replica, access_rules,
share_server=None):
share_server=None, quiesce_wait_time=None):
"""Promote a replica to 'active' replica state."""
return_replica_list = []
for r in replica_list:

View File

@ -507,7 +507,8 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase):
self.dest_vserver, self.fake_dest_vol_name)
self.dm_session.quiesce_then_abort.assert_called_once_with(
self.fake_src_share, self.fake_dest_share)
self.fake_src_share, self.fake_dest_share,
quiesce_wait_time=None)
self.mock_dest_client.mount_volume.assert_called_once_with(
self.fake_dest_vol_name)
@ -524,7 +525,8 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase):
self.dest_vserver, self.fake_dest_vol_name)
self.dm_session.quiesce_then_abort.assert_called_once_with(
self.fake_src_share, self.fake_dest_share)
self.fake_src_share, self.fake_dest_share,
quiesce_wait_time=None)
self.assertFalse(self.mock_dest_client.mount_volume.called)
@ -535,7 +537,8 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase):
self.fake_dest_share)
self.dm_session.quiesce_then_abort.assert_called_once_with(
self.fake_src_share, self.fake_dest_share)
self.fake_src_share, self.fake_dest_share,
quiesce_wait_time=None)
self.mock_dest_client.break_snapmirror_vol.assert_called_once_with(
self.source_vserver, self.fake_src_vol_name,
@ -544,6 +547,39 @@ class NetAppCDOTDataMotionSessionTestCase(test.TestCase):
self.mock_dest_client.mount_volume.assert_called_once_with(
self.fake_dest_vol_name)
@ddt.data(None, 2, 30)
def test_quiesce_then_abort_wait_time(self, wait_time):
self.mock_object(time, 'sleep')
mock_get_snapmirrors = mock.Mock(
return_value=[{'relationship-status': "transferring"}])
self.mock_object(self.mock_dest_client, 'get_snapmirrors',
mock_get_snapmirrors)
mock_backend_config = na_fakes.create_configuration()
mock_backend_config.netapp_snapmirror_quiesce_timeout = 10
self.mock_object(data_motion, 'get_backend_configuration',
mock.Mock(return_value=mock_backend_config))
self.dm_session.quiesce_then_abort(self.fake_src_share,
self.fake_dest_share,
quiesce_wait_time=wait_time)
self.mock_dest_client.get_snapmirrors.assert_called_with(
source_vserver=self.source_vserver,
dest_vserver=self.dest_vserver,
source_volume=self.fake_src_vol_name,
dest_volume=self.fake_dest_vol_name,
desired_attributes=['relationship-status', 'mirror-state']
)
call_count = self.mock_dest_client.get_snapmirrors.call_count
if wait_time:
if wait_time > 5:
self.assertEqual(wait_time / 5, call_count)
else:
self.assertEqual(1, call_count)
else:
self.assertEqual(2, call_count)
def test_quiesce_then_abort_timeout(self):
self.mock_object(time, 'sleep')
mock_get_snapmirrors = mock.Mock(

View File

@ -4786,7 +4786,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.mock_dm_session.update_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.mock_dm_session.break_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.fake_replica, self.fake_replica_2,
quiesce_wait_time=None)
self.assertEqual('fake_export_location',
replica['export_locations'])
@ -4814,7 +4815,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.mock_dm_session.update_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.mock_dm_session.break_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.fake_replica, self.fake_replica_2,
quiesce_wait_time=None)
self.assertEqual('fake_export_location',
replica['export_locations'])
@ -4897,7 +4899,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.mock_dm_session.update_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.mock_dm_session.break_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.fake_replica, self.fake_replica_2,
quiesce_wait_time=None)
self.assertEqual('fake_export_location',
replica['export_locations'])
@ -4932,7 +4935,8 @@ class NetAppFileStorageLibraryTestCase(test.TestCase):
self.mock_dm_session.update_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.mock_dm_session.break_snapmirror.assert_called_once_with(
self.fake_replica, self.fake_replica_2)
self.fake_replica, self.fake_replica_2,
quiesce_wait_time=None)
fake_helper.assert_has_calls([
mock.call.set_client(mock.ANY),

View File

@ -4339,7 +4339,7 @@ class ShareAPITestCase(test.TestCase):
self.context, replica['id'],
{'status': constants.STATUS_REPLICATION_CHANGE})
mock_rpcapi_promote_share_replica_call.assert_called_once_with(
self.context, replica)
self.context, replica, quiesce_wait_time=None)
def test_promote_share_replica(self):
replica = fakes.fake_replica('FAKE_ID', host='HOSTA@BackendB#PoolC')
@ -4352,7 +4352,7 @@ class ShareAPITestCase(test.TestCase):
result = self.api.promote_share_replica(self.context, replica)
mock_sched_rpcapi_call.assert_called_once_with(
self.context, replica)
self.context, replica, quiesce_wait_time=None)
self.assertEqual(replica, result)
def test_update_share_replica_no_host(self):

View File

@ -330,8 +330,9 @@ class ShareRpcAPITestCase(test.TestCase):
def test_promote_share_replica(self):
self._test_share_api('promote_share_replica',
rpc_method='cast',
version='1.8',
share_replica=self.fake_share_replica)
version='1.24',
share_replica=self.fake_share_replica,
quiesce_wait_time=None)
def test_update_share_replica(self):
self._test_share_api('update_share_replica',

View File

@ -0,0 +1,7 @@
---
features:
- |
Starting with microversion 2.75, user can specify quiesce wait time
seconds in share replica promote API. Be aware that not all drivers
support this parameter, when not supported, the value would be silently
ignored.

View File

@ -0,0 +1,7 @@
---
fixes:
- |
In case of NetApp ONTAP driver, user can now set the quiesce timeout
during promote and this timeout has precedence over NetApp configuration
`netapp_snapsmirror_quiesce_timeout`. For more details, please refer to
`launchpad bug 2000171 <https://bugs.launchpad.net/manila/+bug/2000171>`_