From d7286833e1cbd7b1263f97ed38c8606fd6437904 Mon Sep 17 00:00:00 2001 From: wangyao Date: Thu, 9 Nov 2017 09:37:20 +0800 Subject: [PATCH] Stop polling if nova instances goto error According bug description, if the nova instances goto error, the cluster building poll still work until timeout. To solve this problem, instance task status was added in the cluster building poll detection. Also provide a test case to validate the method. Change-Id: Id772d5528c93fcd061aa9d2b2e56c89229ad655c Closes-Bug: #1516763 --- trove/taskmanager/models.py | 16 +++++++--- .../unittests/taskmanager/test_clusters.py | 31 ++++++++++++++++--- .../taskmanager/test_galera_clusters.py | 31 ++++++++++++++++--- .../taskmanager/test_vertica_clusters.py | 31 ++++++++++++++++--- 4 files changed, 93 insertions(+), 16 deletions(-) diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 892c833e03..ddb8e79ddd 100755 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -246,11 +246,16 @@ class ClusterTasks(Cluster): for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() - if _is_fast_fail_status(status): + task_status = DBInstance.find_by( + id=instance_id).get_task_status() + if (_is_fast_fail_status(status) or + (task_status == InstanceTasks.BUILDING_ERROR_SERVER)): # if one has failed, no need to continue polling LOG.debug("Instance %(id)s has acquired a fast-fail " - "status %(status)s.", {'id': instance_id, - 'status': status}) + "status %(status)s and" + " task_status %(task_status)s.", + {'id': instance_id, 'status': status, + 'task_status': task_status}) return True if status != expected_status: # if one is not in the expected state, continue polling @@ -266,7 +271,10 @@ class ClusterTasks(Cluster): for instance_id in ids: status = InstanceServiceStatus.find_by( instance_id=instance_id).get_status() - if _is_fast_fail_status(status): + task_status = DBInstance.find_by( + id=instance_id).get_task_status() + if (_is_fast_fail_status(status) or + (task_status == InstanceTasks.BUILDING_ERROR_SERVER)): failed_instance_ids.append(instance_id) return failed_instance_ids diff --git a/trove/tests/unittests/taskmanager/test_clusters.py b/trove/tests/unittests/taskmanager/test_clusters.py index 642f894d66..e18d828580 100644 --- a/trove/tests/unittests/taskmanager/test_clusters.py +++ b/trove/tests/unittests/taskmanager/test_clusters.py @@ -95,21 +95,44 @@ class MongoDbClusterTasksTest(trove_testtools.TestCase): datastore_version=mock_dv1) @patch.object(ClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') @patch('trove.taskmanager.models.LOG') - def test_all_instances_ready_bad_status(self, mock_logging, - mock_find, mock_update): + def test_all_instances_ready_with_server_error(self, + mock_logging, mock_find, + mock_db_find, mock_update): (mock_find.return_value. - get_status.return_value) = ServiceStatuses.FAILED + get_status.return_value) = ServiceStatuses.NEW + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) mock_update.assert_called_with(self.cluster_id, None) self.assertFalse(ret_val) + @patch.object(ClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') - def test_all_instances_ready(self, mock_find): + @patch('trove.taskmanager.models.LOG') + def test_all_instances_ready_bad_status(self, mock_logging, + mock_find, mock_db_find, + mock_update): + (mock_find.return_value. + get_status.return_value) = ServiceStatuses.FAILED + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE + ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], + self.cluster_id) + mock_update.assert_called_with(self.cluster_id, None) + self.assertFalse(ret_val) + + @patch.object(DBInstance, 'find_by') + @patch.object(InstanceServiceStatus, 'find_by') + def test_all_instances_ready(self, mock_find, mock_db_find): (mock_find.return_value. get_status.return_value) = ServiceStatuses.INSTANCE_READY + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) self.assertTrue(ret_val) diff --git a/trove/tests/unittests/taskmanager/test_galera_clusters.py b/trove/tests/unittests/taskmanager/test_galera_clusters.py index bfdbf52756..1b001453f9 100644 --- a/trove/tests/unittests/taskmanager/test_galera_clusters.py +++ b/trove/tests/unittests/taskmanager/test_galera_clusters.py @@ -91,21 +91,44 @@ class GaleraClusterTasksTest(trove_testtools.TestCase): } @patch.object(GaleraCommonClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') @patch('trove.taskmanager.models.LOG') - def test_all_instances_ready_bad_status(self, mock_logging, - mock_find, mock_update): + def test_all_instances_ready_with_server_error(self, + mock_logging, mock_find, + mock_db_find, mock_update): (mock_find.return_value. - get_status.return_value) = ServiceStatuses.FAILED + get_status.return_value) = ServiceStatuses.NEW + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) mock_update.assert_called_with(self.cluster_id, None) self.assertFalse(ret_val) + @patch.object(GaleraCommonClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') - def test_all_instances_ready(self, mock_find): + @patch('trove.taskmanager.models.LOG') + def test_all_instances_ready_bad_status(self, mock_logging, + mock_find, mock_db_find, + mock_update): + (mock_find.return_value. + get_status.return_value) = ServiceStatuses.FAILED + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE + ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], + self.cluster_id) + mock_update.assert_called_with(self.cluster_id, None) + self.assertFalse(ret_val) + + @patch.object(DBInstance, 'find_by') + @patch.object(InstanceServiceStatus, 'find_by') + def test_all_instances_ready(self, mock_find, mock_db_find): (mock_find.return_value. get_status.return_value) = ServiceStatuses.INSTANCE_READY + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) self.assertTrue(ret_val) diff --git a/trove/tests/unittests/taskmanager/test_vertica_clusters.py b/trove/tests/unittests/taskmanager/test_vertica_clusters.py index 5d9f5712b6..8d0a14e1eb 100644 --- a/trove/tests/unittests/taskmanager/test_vertica_clusters.py +++ b/trove/tests/unittests/taskmanager/test_vertica_clusters.py @@ -86,21 +86,44 @@ class VerticaClusterTasksTest(trove_testtools.TestCase): datastore_version=mock_dv1) @patch.object(ClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') @patch('trove.taskmanager.models.LOG') - def test_all_instances_ready_bad_status(self, mock_logging, - mock_find, mock_update): + def test_all_instances_ready_with_server_error(self, + mock_logging, mock_find, + mock_db_find, mock_update): (mock_find.return_value. - get_status.return_value) = ServiceStatuses.FAILED + get_status.return_value) = ServiceStatuses.NEW + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) mock_update.assert_called_with(self.cluster_id, None) self.assertFalse(ret_val) + @patch.object(ClusterTasks, 'update_statuses_on_failure') + @patch.object(DBInstance, 'find_by') @patch.object(InstanceServiceStatus, 'find_by') - def test_all_instances_ready(self, mock_find): + @patch('trove.taskmanager.models.LOG') + def test_all_instances_ready_bad_status(self, mock_logging, + mock_find, mock_db_find, + mock_update): + (mock_find.return_value. + get_status.return_value) = ServiceStatuses.FAILED + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE + ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], + self.cluster_id) + mock_update.assert_called_with(self.cluster_id, None) + self.assertFalse(ret_val) + + @patch.object(DBInstance, 'find_by') + @patch.object(InstanceServiceStatus, 'find_by') + def test_all_instances_ready(self, mock_find, mock_db_find): (mock_find.return_value. get_status.return_value) = ServiceStatuses.INSTANCE_READY + (mock_db_find.return_value. + get_task_status.return_value) = InstanceTasks.NONE ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"], self.cluster_id) self.assertTrue(ret_val)