Stop polling if nova instances goto error

According bug description, if the nova instances goto error, the cluster
building poll still work until timeout. To solve this problem, instance
task status was added in the cluster building poll detection.
Also provide a test case to validate the method.

Change-Id: Id772d5528c93fcd061aa9d2b2e56c89229ad655c
Closes-Bug: #1516763
This commit is contained in:
wangyao 2017-11-09 09:37:20 +08:00
parent 1376fb1b6c
commit d7286833e1
4 changed files with 93 additions and 16 deletions

View File

@ -246,11 +246,16 @@ class ClusterTasks(Cluster):
for instance_id in ids:
status = InstanceServiceStatus.find_by(
instance_id=instance_id).get_status()
if _is_fast_fail_status(status):
task_status = DBInstance.find_by(
id=instance_id).get_task_status()
if (_is_fast_fail_status(status) or
(task_status == InstanceTasks.BUILDING_ERROR_SERVER)):
# if one has failed, no need to continue polling
LOG.debug("Instance %(id)s has acquired a fast-fail "
"status %(status)s.", {'id': instance_id,
'status': status})
"status %(status)s and"
" task_status %(task_status)s.",
{'id': instance_id, 'status': status,
'task_status': task_status})
return True
if status != expected_status:
# if one is not in the expected state, continue polling
@ -266,7 +271,10 @@ class ClusterTasks(Cluster):
for instance_id in ids:
status = InstanceServiceStatus.find_by(
instance_id=instance_id).get_status()
if _is_fast_fail_status(status):
task_status = DBInstance.find_by(
id=instance_id).get_task_status()
if (_is_fast_fail_status(status) or
(task_status == InstanceTasks.BUILDING_ERROR_SERVER)):
failed_instance_ids.append(instance_id)
return failed_instance_ids

View File

@ -95,21 +95,44 @@ class MongoDbClusterTasksTest(trove_testtools.TestCase):
datastore_version=mock_dv1)
@patch.object(ClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_update):
def test_all_instances_ready_with_server_error(self,
mock_logging, mock_find,
mock_db_find, mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
get_status.return_value) = ServiceStatuses.NEW
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(ClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find):
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_db_find,
mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find, mock_db_find):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.INSTANCE_READY
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
self.assertTrue(ret_val)

View File

@ -91,21 +91,44 @@ class GaleraClusterTasksTest(trove_testtools.TestCase):
}
@patch.object(GaleraCommonClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_update):
def test_all_instances_ready_with_server_error(self,
mock_logging, mock_find,
mock_db_find, mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
get_status.return_value) = ServiceStatuses.NEW
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(GaleraCommonClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find):
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_db_find,
mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find, mock_db_find):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.INSTANCE_READY
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
self.assertTrue(ret_val)

View File

@ -86,21 +86,44 @@ class VerticaClusterTasksTest(trove_testtools.TestCase):
datastore_version=mock_dv1)
@patch.object(ClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_update):
def test_all_instances_ready_with_server_error(self,
mock_logging, mock_find,
mock_db_find, mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
get_status.return_value) = ServiceStatuses.NEW
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.BUILDING_ERROR_SERVER
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(ClusterTasks, 'update_statuses_on_failure')
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find):
@patch('trove.taskmanager.models.LOG')
def test_all_instances_ready_bad_status(self, mock_logging,
mock_find, mock_db_find,
mock_update):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.FAILED
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
mock_update.assert_called_with(self.cluster_id, None)
self.assertFalse(ret_val)
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_ready(self, mock_find, mock_db_find):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.INSTANCE_READY
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_ready(["1", "2", "3", "4"],
self.cluster_id)
self.assertTrue(ret_val)