Handle DB failures in servicegroup DB driver

Fix an issue where when local conductor is used, the DB driver for
servicegroup will not handle transient DB problems gracefully.  The
patch makes the behavior consistent with messaging timeouts if remote
conductor is used.

Change-Id: Ie736e7b64ea4f60f78878c8713ce826702f9f05e
Closes-Bug: 1505471
This commit is contained in:
Brian Elliott 2015-10-13 02:24:38 +00:00 committed by John Garbutt
parent 4b0d117f0a
commit 5252bba03e
2 changed files with 37 additions and 3 deletions

View File

@ -14,6 +14,7 @@
# limitations under the License.
from oslo_config import cfg
from oslo_db import exception as db_exception
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_utils import timeutils
@ -26,6 +27,7 @@ from nova.servicegroup.drivers import base
CONF = cfg.CONF
CONF.import_opt('service_down_time', 'nova.service')
CONF.import_opt('use_local', 'nova.conductor.api', group='conductor')
LOG = logging.getLogger(__name__)
@ -82,6 +84,14 @@ class DbDriver(base.Driver):
def _report_state(self, service):
"""Update the state of this service in the datastore."""
if CONF.conductor.use_local:
# need to catch DB type errors
exc_cls = db_exception.DBError # oslo.db exception base class
else:
# need to catch messaging timeouts
exc_cls = messaging.MessagingTimeout
try:
service.service_ref.report_count += 1
service.service_ref.save()
@ -93,9 +103,8 @@ class DbDriver(base.Driver):
_LI('Recovered connection to nova-conductor '
'for reporting service status.'))
# because we are communicating over conductor, a failure to
# connect is going to be a messaging failure, not a db error.
except messaging.MessagingTimeout:
# the type of failure depends on use of remote or local conductor
except exc_cls:
if not getattr(service, 'model_disconnected', False):
service.model_disconnected = True
LOG.warn(_LW('Lost connection to nova-conductor '

View File

@ -14,6 +14,8 @@
import datetime
import mock
from oslo_db import exception as db_exception
import oslo_messaging as messaging
from nova import objects
from nova import servicegroup
@ -84,3 +86,26 @@ class DBServiceGroupTestCase(test.NoDBTestCase):
fn(service)
upd_mock.assert_called_once_with()
self.assertEqual(11, service_ref.report_count)
@mock.patch.object(objects.Service, 'save')
def _test_report_state_error(self, exc_cls, upd_mock):
upd_mock.side_effect = exc_cls("service save failed")
service_ref = objects.Service(host='fake-host', topic='compute',
report_count=10)
service = mock.MagicMock(model_disconnected=False,
service_ref=service_ref)
fn = self.servicegroup_api._driver._report_state
fn(service) # fail if exception not caught
def test_report_state_remote_error_handling(self):
# test error handling using remote conductor
self.flags(use_local=False, group='conductor')
self._test_report_state_error(messaging.MessagingTimeout)
def test_report_state_local_error_handling(self):
# if using local conductor, the db driver must handle DB errors
self.flags(use_local=True, group='conductor')
# mock an oslo.db DBError as it's an exception base class for
# oslo.db DB errors (eg DBConnectionError)
self._test_report_state_error(db_exception.DBError)