From 945d662d4f0a0b1b14f32ab92aaf3f0f35e0be4b Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Wed, 4 Dec 2019 09:50:17 -0500 Subject: [PATCH] Handle ServiceNotFound in DbDriver._report_state If a service record is gone from the DB the _report_state method will log a generic traceback every time the report interval runs, which is every 10 seconds by default. This gets pretty noisy and the error logged isn't very useful. One could get into this state by deleting the service record in the API before stopping the actual process that is running. This simply handles the ServiceNotFound error and logs a more useful error message without the noisy traceback. Change-Id: If0336001fbe39922a199756db0803121cbe560af Related-Bug: #1813147 --- nova/servicegroup/drivers/db.py | 11 +++++++++++ nova/tests/unit/servicegroup/test_db_servicegroup.py | 11 ++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/nova/servicegroup/drivers/db.py b/nova/servicegroup/drivers/db.py index 98a6c435e18e..9b134ffebb1b 100644 --- a/nova/servicegroup/drivers/db.py +++ b/nova/servicegroup/drivers/db.py @@ -19,6 +19,7 @@ from oslo_utils import timeutils import six import nova.conf +from nova import exception from nova.i18n import _, _LI, _LW, _LE from nova.servicegroup import api from nova.servicegroup.drivers import base @@ -103,6 +104,16 @@ class DbDriver(base.Driver): service.model_disconnected = True LOG.warning(_LW('Lost connection to nova-conductor ' 'for reporting service status.')) + except exception.ServiceNotFound: + # The service may have been deleted via the API but the actual + # process is still running. Provide a useful error message rather + # than the noisy traceback in the generic Exception block below. + LOG.error('The services table record for the %s service on ' + 'host %s is gone. You either need to stop this service ' + 'if it should be deleted or restart it to recreate the ' + 'record in the database.', + service.service_ref.binary, service.service_ref.host) + service.model_disconnected = True except Exception: # NOTE(rpodolyaka): we'd like to avoid catching of all possible # exceptions here, but otherwise it would become possible for diff --git a/nova/tests/unit/servicegroup/test_db_servicegroup.py b/nova/tests/unit/servicegroup/test_db_servicegroup.py index f271f71f4223..9e04451ec790 100644 --- a/nova/tests/unit/servicegroup/test_db_servicegroup.py +++ b/nova/tests/unit/servicegroup/test_db_servicegroup.py @@ -17,6 +17,7 @@ import oslo_messaging as messaging from oslo_utils import fixture as utils_fixture from oslo_utils import timeutils +from nova import exception from nova import objects from nova import servicegroup from nova import test @@ -95,12 +96,13 @@ class DBServiceGroupTestCase(test.NoDBTestCase): def _test_report_state_error(self, exc_cls, upd_mock): upd_mock.side_effect = exc_cls("service save failed") service_ref = objects.Service(host='fake-host', topic='compute', - report_count=10) + report_count=10, binary='nova-compute') service = mock.MagicMock(model_disconnected=False, service_ref=service_ref) fn = self.servicegroup_api._driver._report_state fn(service) # fail if exception not caught self.assertTrue(service.model_disconnected) + return service_ref def test_report_state_error_handling_timeout(self): self._test_report_state_error(messaging.MessagingTimeout) @@ -108,6 +110,13 @@ class DBServiceGroupTestCase(test.NoDBTestCase): def test_report_state_unexpected_error(self): self._test_report_state_error(RuntimeError) + def test_report_state_service_not_found(self): + service_ref = self._test_report_state_error(exception.ServiceNotFound) + self.assertIn('The services table record for the %s service on ' + 'host %s is gone.' % + (service_ref.binary, service_ref.host), + self.stdlog.logger.output) + def test_get_updated_time(self): retval = "2016-11-02T22:40:31.000000" service_ref = {