Fix instance from alternating status on create

Instances will currently alternate between
BUILD->ACTIVE->BUILD->ACTIVE on create. This was
happening because a race condition existed between
the datastore sending heartbeats that the service is
ACTIVE before and the prepare call manually stopping
the service to bring the status back to BUILD.

The fix is to add an optional param force to set_status
that is going to be set to True for end_install_or_restart
which is called at the end of prepare for all datastores either
directly or through app.complete_install_or_restart(). When
set_status is called it will now check if the instance status
was currently BUILDING and if the force flag is False. This
means that the prepare call has not yet finished and to skip
the heartbeat from updating the status. Once prepare is finished
it will call complete_install_or_restart which will in turn
call end_install_or_restart and force the status to be updated
from BUILD to ACTIVE. If setting status to FAILED or
BUILD_PENDING it will never skip the heartbeat.

For mongodb I added complete_install_or_restart to service.py
to be called at the end of prepare for single instance mongo
instead of calling set_status directly to RUNNING.

Cleaned up BaseDbStatusTests and added new tests covering new
logic.

Change-Id: I7cbd5667e27608edef9755280a8f072495839e1d
Closes-Bug: 1482795
This commit is contained in:
Edmond Kotowski 2015-08-20 19:07:04 -07:00
parent 368fd674ce
commit 2b963fa4b1
4 changed files with 105 additions and 49 deletions

View File

@ -114,8 +114,7 @@ class Manager(periodic_task.PeriodicTasks):
self.app.status.set_status(
ds_instance.ServiceStatuses.BUILD_PENDING)
else:
self.app.status.set_status(
ds_instance.ServiceStatuses.RUNNING)
self.app.complete_install_or_restart()
LOG.info(_('Completed setup of MongoDB database instance.'))

View File

@ -181,6 +181,9 @@ class MongoDBApp(object):
raise RuntimeError("Could not start MongoDB.")
LOG.debug('MongoDB started successfully.')
def complete_install_or_restart(self):
self.status.end_install_or_restart()
def update_overrides(self, context, overrides, remove=False):
if overrides:
self.configuration_manager.apply_user_override(overrides)

View File

@ -77,7 +77,7 @@ class BaseDbStatus(object):
self.restart_mode = False
real_status = self._get_actual_db_status()
LOG.info(_("Updating database status to %s.") % real_status)
self.set_status(real_status)
self.set_status(real_status, force=True)
def _get_actual_db_status(self):
raise NotImplementedError()
@ -103,8 +103,18 @@ class BaseDbStatus(object):
return (self.status is not None and
self.status == instance.ServiceStatuses.RUNNING)
def set_status(self, status):
def set_status(self, status, force=False):
"""Use conductor to update the DB app status."""
force_heartbeat_status = (
status == instance.ServiceStatuses.FAILED or
status == instance.ServiceStatuses.BUILD_PENDING)
if (not force_heartbeat_status and not force and
(self.status == instance.ServiceStatuses.NEW or
self.status == instance.ServiceStatuses.BUILDING)):
LOG.debug("Prepare has not run yet, skipping heartbeat.")
return
LOG.debug("Casting set_status message to conductor (status is '%s')." %
status.description)
context = trove_context.TroveContext()

View File

@ -33,6 +33,7 @@ from testtools.matchers import Is
from testtools.matchers import Not
from trove.common import cfg
from trove.common import context as trove_context
from trove.common.exception import BadRequest
from trove.common.exception import GuestError
from trove.common.exception import PollTimeOut
@ -71,6 +72,7 @@ from trove.guestagent.datastore.mysql.service import MySqlApp
from trove.guestagent.datastore.mysql.service import MySqlAppStatus
from trove.guestagent.datastore.mysql.service import MySqlRootAccess
import trove.guestagent.datastore.mysql.service_base as dbaas_base
import trove.guestagent.datastore.service as base_datastore_service
from trove.guestagent.datastore.service import BaseDbStatus
from trove.guestagent.db import models
from trove.guestagent import dbaas as dbaas_sr
@ -103,6 +105,7 @@ class FakeAppStatus(BaseDbStatus):
def __init__(self, id, status):
self.id = id
self.status = status
self.next_fake_status = status
def _get_actual_db_status(self):
@ -1780,6 +1783,15 @@ class BaseDbStatusTest(testtools.TestCase):
InstanceServiceStatus.create(instance_id=self.FAKE_ID,
status=rd_instance.ServiceStatuses.NEW)
dbaas.CONF.guest_id = self.FAKE_ID
patcher_log = patch.object(base_datastore_service, 'LOG')
patcher_context = patch.object(trove_context, 'TroveContext')
patcher_api = patch.object(conductor_api, 'API')
patcher_log.start()
patcher_context.start()
patcher_api.start()
self.addCleanup(patcher_log.stop)
self.addCleanup(patcher_context.stop)
self.addCleanup(patcher_api.stop)
def tearDown(self):
super(BaseDbStatusTest, self).tearDown()
@ -1788,103 +1800,135 @@ class BaseDbStatusTest(testtools.TestCase):
dbaas.CONF.guest_id = None
def test_begin_install(self):
base_db_status = BaseDbStatus()
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.begin_install()
base_db_status.begin_install()
self.assertEqual(rd_instance.ServiceStatuses.BUILDING,
self.baseDbStatus.status)
base_db_status.status)
def test_begin_restart(self):
base_db_status = BaseDbStatus()
base_db_status.restart_mode = False
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.restart_mode = False
base_db_status.begin_restart()
self.baseDbStatus.begin_restart()
self.assertTrue(self.baseDbStatus.restart_mode)
self.assertTrue(base_db_status.restart_mode)
def test_end_install_or_restart(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus._get_actual_db_status = Mock(
base_db_status = BaseDbStatus()
base_db_status._get_actual_db_status = Mock(
return_value=rd_instance.ServiceStatuses.SHUTDOWN)
self.baseDbStatus.end_install_or_restart()
base_db_status.end_install_or_restart()
self.assertEqual(rd_instance.ServiceStatuses.SHUTDOWN,
self.baseDbStatus.status)
self.assertFalse(self.baseDbStatus.restart_mode)
base_db_status.status)
self.assertFalse(base_db_status.restart_mode)
def test_is_installed(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.RUNNING
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.RUNNING
self.assertTrue(self.baseDbStatus.is_installed)
self.assertTrue(base_db_status.is_installed)
def test_is_installed_none(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = None
base_db_status = BaseDbStatus()
base_db_status.status = None
self.assertTrue(self.baseDbStatus.is_installed)
self.assertTrue(base_db_status.is_installed)
def test_is_installed_building(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.BUILDING
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.BUILDING
self.assertFalse(self.baseDbStatus.is_installed)
self.assertFalse(base_db_status.is_installed)
def test_is_installed_new(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.NEW
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.NEW
self.assertFalse(self.baseDbStatus.is_installed)
self.assertFalse(base_db_status.is_installed)
def test_is_installed_failed(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.FAILED
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.FAILED
self.assertFalse(self.baseDbStatus.is_installed)
self.assertFalse(base_db_status.is_installed)
def test_is_restarting(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.restart_mode = True
base_db_status = BaseDbStatus()
base_db_status.restart_mode = True
self.assertTrue(self.baseDbStatus._is_restarting)
self.assertTrue(base_db_status._is_restarting)
def test_is_running(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.RUNNING
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.RUNNING
self.assertTrue(self.baseDbStatus.is_running)
self.assertTrue(base_db_status.is_running)
def test_is_running_not(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus.status = rd_instance.ServiceStatuses.SHUTDOWN
base_db_status = BaseDbStatus()
base_db_status.status = rd_instance.ServiceStatuses.SHUTDOWN
self.assertFalse(self.baseDbStatus.is_running)
self.assertFalse(base_db_status.is_running)
def test_wait_for_real_status_to_change_to(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus._get_actual_db_status = Mock(
base_db_status = BaseDbStatus()
base_db_status._get_actual_db_status = Mock(
return_value=rd_instance.ServiceStatuses.RUNNING)
time.sleep = Mock()
self.assertTrue(self.baseDbStatus.
self.assertTrue(base_db_status.
wait_for_real_status_to_change_to
(rd_instance.ServiceStatuses.RUNNING, 10))
def test_wait_for_real_status_to_change_to_timeout(self):
self.baseDbStatus = BaseDbStatus()
self.baseDbStatus._get_actual_db_status = Mock(
base_db_status = BaseDbStatus()
base_db_status._get_actual_db_status = Mock(
return_value=rd_instance.ServiceStatuses.RUNNING)
time.sleep = Mock()
self.assertFalse(self.baseDbStatus.
self.assertFalse(base_db_status.
wait_for_real_status_to_change_to
(rd_instance.ServiceStatuses.SHUTDOWN, 10))
def _test_set_status(self, initial_status, new_status,
expected_status, force=False):
base_db_status = BaseDbStatus()
base_db_status.status = initial_status
base_db_status.set_status(new_status, force=force)
self.assertEqual(expected_status,
base_db_status.status)
def test_set_status_force_heartbeat(self):
self._test_set_status(rd_instance.ServiceStatuses.BUILDING,
rd_instance.ServiceStatuses.RUNNING,
rd_instance.ServiceStatuses.RUNNING,
force=True)
def test_set_status_skip_heartbeat_with_building(self):
self._test_set_status(rd_instance.ServiceStatuses.BUILDING,
rd_instance.ServiceStatuses.RUNNING,
rd_instance.ServiceStatuses.BUILDING)
def test_set_status_skip_heartbeat_with_new(self):
self._test_set_status(rd_instance.ServiceStatuses.NEW,
rd_instance.ServiceStatuses.RUNNING,
rd_instance.ServiceStatuses.NEW)
def test_set_status_to_failed(self):
self._test_set_status(rd_instance.ServiceStatuses.BUILDING,
rd_instance.ServiceStatuses.FAILED,
rd_instance.ServiceStatuses.FAILED)
def test_set_status_to_build_pending(self):
self._test_set_status(rd_instance.ServiceStatuses.BUILDING,
rd_instance.ServiceStatuses.BUILD_PENDING,
rd_instance.ServiceStatuses.BUILD_PENDING)
class MySqlAppStatusTest(testtools.TestCase):