From dbbc95c9c37304c7119e303c40a06f8b3b22b92e Mon Sep 17 00:00:00 2001 From: tengqm Date: Tue, 14 Nov 2017 08:16:10 -0500 Subject: [PATCH] Add lock retry logic Change-Id: I6526a85c3378a574c62371c0aafa2cafc088613e --- senlin/engine/senlin_lock.py | 26 +++++++++++++------- senlin/tests/unit/engine/test_senlin_lock.py | 26 +++++++++++--------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/senlin/engine/senlin_lock.py b/senlin/engine/senlin_lock.py index 353df6440..cb7f0e190 100644 --- a/senlin/engine/senlin_lock.py +++ b/senlin/engine/senlin_lock.py @@ -10,9 +10,12 @@ # License for the specific language governing permissions and limitations # under the License. +import eventlet +import random +import time + from oslo_config import cfg from oslo_log import log as logging -import time from senlin.common.i18n import _ from senlin.common import utils @@ -51,9 +54,11 @@ def cluster_lock_acquire(context, cluster_id, action_id, engine=None, # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success - owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope) - if action_id in owners: - return True + for retries in range(3): + owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope) + if action_id in owners: + return True + eventlet.sleep(random.randrange(1, 3)) # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: @@ -74,9 +79,12 @@ def cluster_lock_acquire(context, cluster_id, action_id, engine=None, objects.Service.gc_by_engine(dead_engine) return action_id in owners - LOG.error('Cluster is already locked by action %(old)s, ' - 'action %(new)s failed grabbing the lock', - {'old': str(owners), 'new': action_id}) + lock_owners = [] + for o in owners: + lock_owners.append(o[:8]) + LOG.warning('Cluster is already locked by action %(old)s, ' + 'action %(new)s failed grabbing the lock', + {'old': str(lock_owners), 'new': action_id[:8]}) return False @@ -84,8 +92,8 @@ def cluster_lock_acquire(context, cluster_id, action_id, engine=None, def cluster_lock_release(cluster_id, action_id, scope): """Release the lock on the specified cluster. - :param cluster_id: ID of the node to be released. - :param action_id: ID of the action that attempts to release the node. + :param cluster_id: ID of the cluster to be released. + :param action_id: ID of the action that attempts to release the cluster. :param scope: The scope of the lock to be released. """ return cl_obj.ClusterLock.release(cluster_id, action_id, scope) diff --git a/senlin/tests/unit/engine/test_senlin_lock.py b/senlin/tests/unit/engine/test_senlin_lock.py index b1480405f..97a924700 100644 --- a/senlin/tests/unit/engine/test_senlin_lock.py +++ b/senlin/tests/unit/engine/test_senlin_lock.py @@ -56,8 +56,9 @@ class SenlinLockTest(base.SenlinTestCase): 'NEW_ENGINE') self.assertTrue(res) - mock_acquire.assert_called_once_with("CLUSTER_A", "ACTION_XYZ", - lockm.CLUSTER_SCOPE) + mock_acquire.assert_called_with("CLUSTER_A", "ACTION_XYZ", + lockm.CLUSTER_SCOPE) + self.assertEqual(3, mock_acquire.call_count) mock_steal.assert_called_once_with('CLUSTER_A', 'ACTION_XYZ') mock_gc.assert_called_once_with(mock.ANY) @@ -65,26 +66,28 @@ class SenlinLockTest(base.SenlinTestCase): @mock.patch.object(clo.ClusterLock, "acquire") def test_cluster_lock_acquire_failed(self, mock_acquire, mock_dead): mock_dead.return_value = False - mock_acquire.return_value = 'ACTION_ABC' + mock_acquire.return_value = ['ACTION_ABC'] res = lockm.cluster_lock_acquire(self.ctx, 'CLUSTER_A', 'ACTION_XYZ') self.assertFalse(res) - mock_acquire.assert_called_once_with('CLUSTER_A', 'ACTION_XYZ', - lockm.CLUSTER_SCOPE) + mock_acquire.assert_called_with('CLUSTER_A', 'ACTION_XYZ', + lockm.CLUSTER_SCOPE) + self.assertEqual(3, mock_acquire.call_count) @mock.patch.object(clo.ClusterLock, "acquire") @mock.patch.object(clo.ClusterLock, "steal") def test_cluster_lock_acquire_forced(self, mock_steal, mock_acquire): - mock_acquire.side_effect = ['ACTION_ABC'] + mock_acquire.return_value = ['ACTION_ABC'] mock_steal.return_value = ['ACTION_XY'] res = lockm.cluster_lock_acquire(self.ctx, 'CLUSTER_A', 'ACTION_XY', forced=True) self.assertTrue(res) - mock_acquire.assert_called_once_with('CLUSTER_A', 'ACTION_XY', - lockm.CLUSTER_SCOPE) + mock_acquire.assert_called_with('CLUSTER_A', 'ACTION_XY', + lockm.CLUSTER_SCOPE) + self.assertEqual(3, mock_acquire.call_count) mock_steal.assert_called_once_with('CLUSTER_A', 'ACTION_XY') @mock.patch.object(common_utils, 'is_engine_dead') @@ -93,15 +96,16 @@ class SenlinLockTest(base.SenlinTestCase): def test_cluster_lock_acquire_steal_failed(self, mock_steal, mock_acquire, mock_dead): mock_dead.return_value = False - mock_acquire.side_effect = ['ACTION_ABC'] + mock_acquire.return_value = ['ACTION_ABC'] mock_steal.return_value = [] res = lockm.cluster_lock_acquire(self.ctx, 'CLUSTER_A', 'ACTION_XY', forced=True) self.assertFalse(res) - mock_acquire.assert_called_once_with('CLUSTER_A', 'ACTION_XY', - lockm.CLUSTER_SCOPE) + mock_acquire.assert_called_with('CLUSTER_A', 'ACTION_XY', + lockm.CLUSTER_SCOPE) + self.assertEqual(3, mock_acquire.call_count) mock_steal.assert_called_once_with('CLUSTER_A', 'ACTION_XY') @mock.patch.object(clo.ClusterLock, "release")