Handle DB failures in servicegroup DB driver
Fix an issue where when local conductor is used, the DB driver for
servicegroup will not handle transient DB problems gracefully. The
patch makes the behavior consistent with messaging timeouts if remote
conductor is used.
Change-Id: Ie736e7b64ea4f60f78878c8713ce826702f9f05e
Closes-Bug: 1505471
(cherry picked from commit 5252bba03e
)
This commit is contained in:
parent
75ff50f232
commit
626139ede7
|
@ -14,6 +14,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_db import exception as db_exception
|
||||
from oslo_log import log as logging
|
||||
import oslo_messaging as messaging
|
||||
from oslo_utils import timeutils
|
||||
|
@ -26,6 +27,7 @@ from nova.servicegroup.drivers import base
|
|||
|
||||
CONF = cfg.CONF
|
||||
CONF.import_opt('service_down_time', 'nova.service')
|
||||
CONF.import_opt('use_local', 'nova.conductor.api', group='conductor')
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@ -82,6 +84,14 @@ class DbDriver(base.Driver):
|
|||
|
||||
def _report_state(self, service):
|
||||
"""Update the state of this service in the datastore."""
|
||||
|
||||
if CONF.conductor.use_local:
|
||||
# need to catch DB type errors
|
||||
exc_cls = db_exception.DBError # oslo.db exception base class
|
||||
else:
|
||||
# need to catch messaging timeouts
|
||||
exc_cls = messaging.MessagingTimeout
|
||||
|
||||
try:
|
||||
service.service_ref.report_count += 1
|
||||
service.service_ref.save()
|
||||
|
@ -93,9 +103,8 @@ class DbDriver(base.Driver):
|
|||
_LI('Recovered connection to nova-conductor '
|
||||
'for reporting service status.'))
|
||||
|
||||
# because we are communicating over conductor, a failure to
|
||||
# connect is going to be a messaging failure, not a db error.
|
||||
except messaging.MessagingTimeout:
|
||||
# the type of failure depends on use of remote or local conductor
|
||||
except exc_cls:
|
||||
if not getattr(service, 'model_disconnected', False):
|
||||
service.model_disconnected = True
|
||||
LOG.warn(_LW('Lost connection to nova-conductor '
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
import datetime
|
||||
import mock
|
||||
from oslo_db import exception as db_exception
|
||||
import oslo_messaging as messaging
|
||||
|
||||
from nova import objects
|
||||
from nova import servicegroup
|
||||
|
@ -84,3 +86,26 @@ class DBServiceGroupTestCase(test.NoDBTestCase):
|
|||
fn(service)
|
||||
upd_mock.assert_called_once_with()
|
||||
self.assertEqual(11, service_ref.report_count)
|
||||
|
||||
@mock.patch.object(objects.Service, 'save')
|
||||
def _test_report_state_error(self, exc_cls, upd_mock):
|
||||
upd_mock.side_effect = exc_cls("service save failed")
|
||||
service_ref = objects.Service(host='fake-host', topic='compute',
|
||||
report_count=10)
|
||||
service = mock.MagicMock(model_disconnected=False,
|
||||
service_ref=service_ref)
|
||||
fn = self.servicegroup_api._driver._report_state
|
||||
fn(service) # fail if exception not caught
|
||||
|
||||
def test_report_state_remote_error_handling(self):
|
||||
# test error handling using remote conductor
|
||||
self.flags(use_local=False, group='conductor')
|
||||
self._test_report_state_error(messaging.MessagingTimeout)
|
||||
|
||||
def test_report_state_local_error_handling(self):
|
||||
# if using local conductor, the db driver must handle DB errors
|
||||
self.flags(use_local=True, group='conductor')
|
||||
|
||||
# mock an oslo.db DBError as it's an exception base class for
|
||||
# oslo.db DB errors (eg DBConnectionError)
|
||||
self._test_report_state_error(db_exception.DBError)
|
||||
|
|
Loading…
Reference in New Issue