From ad8a538094652e9f1e722b29c73e4a454d9b9e16 Mon Sep 17 00:00:00 2001 From: tengqm Date: Tue, 16 Aug 2016 02:19:30 -0400 Subject: [PATCH] Fix service filtering when claiming clusters The health manager is not claiming clusters accurately when service is restarted. The reason is that there are some dead service records to be removed next time one of the engines gets restarted. This patch adds a check about the service's status and filters the service records based on the check result. Change-Id: Id482e74068cc1cb4394c79f14f1c6442cf9f717b Partial-Bug: #1613552 --- senlin/db/sqlalchemy/api.py | 9 ++++---- senlin/db/sqlalchemy/utils.py | 11 ++++++++++ senlin/tests/unit/db/test_registry_api.py | 19 +++++++++++++++++ senlin/tests/unit/db/test_sqlalchemy_utils.py | 21 +++++++++++++++++++ 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/senlin/db/sqlalchemy/api.py b/senlin/db/sqlalchemy/api.py index 86dee23af..dd08e325c 100644 --- a/senlin/db/sqlalchemy/api.py +++ b/senlin/db/sqlalchemy/api.py @@ -1188,11 +1188,12 @@ def service_get_all(context): # HealthRegistry def registry_claim(context, engine_id): with session_for_write() as session: - q_eng = session.query(models.Service) - svc_ids = [s.id for s in q_eng.all()] - + engines = session.query(models.Service).all() + svc_ids = [e.id for e in engines if not utils.is_service_dead(e)] q_reg = session.query(models.HealthRegistry) - q_reg = q_reg.filter(models.HealthRegistry.engine_id.notin_(svc_ids)) + if svc_ids: + q_reg = q_reg.filter( + models.HealthRegistry.engine_id.notin_(svc_ids)) q_reg.update({'engine_id': engine_id}, synchronize_session=False) result = q_reg.all() return result diff --git a/senlin/db/sqlalchemy/utils.py b/senlin/db/sqlalchemy/utils.py index e44c7ade5..06f12c3d6 100644 --- a/senlin/db/sqlalchemy/utils.py +++ b/senlin/db/sqlalchemy/utils.py @@ -12,6 +12,9 @@ import six +from oslo_config import cfg +from oslo_utils import timeutils + def exact_filter(query, model, filters): """Applies exact match filtering to a query. @@ -73,3 +76,11 @@ def get_sort_params(value, default_key=None): dirs.append('asc') return keys, dirs + + +def is_service_dead(service): + """Check if a given service is dead.""" + cfg.CONF.import_opt("periodic_interval", "senlin.common.config") + max_elapse = 2 * cfg.CONF.periodic_interval + + return timeutils.is_older_than(service.updated_at, max_elapse) diff --git a/senlin/tests/unit/db/test_registry_api.py b/senlin/tests/unit/db/test_registry_api.py index 520134e39..ae949af43 100644 --- a/senlin/tests/unit/db/test_registry_api.py +++ b/senlin/tests/unit/db/test_registry_api.py @@ -10,8 +10,10 @@ # License for the specific language governing permissions and limitations # under the License. +import mock from senlin.db.sqlalchemy import api as db_api +from senlin.db.sqlalchemy import utils as db_utils from senlin.tests.unit.common import base from senlin.tests.unit.common import utils @@ -58,6 +60,23 @@ class DBAPIRegistryTest(base.SenlinTestCase): self.assertEqual('ENGINE_ID', registries[0].engine_id) self.assertEqual('ENGINE_ID', registries[1].engine_id) + @mock.patch.object(db_utils, 'is_service_dead') + def test_registry_claim_with_dead_engine(self, mock_check): + db_api.service_create(self.ctx, 'SERVICE_ID_DEAD') + self._create_registry( + cluster_id='CLUSTER_1', check_type='NODE_STATUS_POLLING', + interval=60, params={}, engine_id='SERVICE_ID') + self._create_registry( + cluster_id='CLUSTER_1', check_type='NODE_STATUS_POLLING', + interval=60, params={}, engine_id='SERVICE_ID_DEAD') + + mock_check.side_effect = [False, True] + + registries = db_api.registry_claim(self.ctx, engine_id='ENGINE_ID') + + self.assertEqual(1, len(registries)) + self.assertEqual('ENGINE_ID', registries[0].engine_id) + def test_registry_delete(self): registry = self._create_registry('CLUSTER_ID', check_type='NODE_STATUS_POLLING', diff --git a/senlin/tests/unit/db/test_sqlalchemy_utils.py b/senlin/tests/unit/db/test_sqlalchemy_utils.py index 9b7d305d9..2dabd72be 100644 --- a/senlin/tests/unit/db/test_sqlalchemy_utils.py +++ b/senlin/tests/unit/db/test_sqlalchemy_utils.py @@ -11,6 +11,8 @@ # under the License. import mock +from oslo_config import cfg +from oslo_utils import timeutils from senlin.db.sqlalchemy import utils from senlin.tests.unit.common import base @@ -95,3 +97,22 @@ class SortParamTest(base.SenlinTestCase): self.assertEqual(['foo', 'bar', 'id'], keys) self.assertEqual(['asc-nullsfirst', 'asc-nullsfirst', 'asc-nullsfirst'], dirs) + + +class ServiceAliveTest(base.SenlinTestCase): + + def test_alive(self): + cfg.CONF.set_override('periodic_interval', 100) + service = mock.Mock(updated_at=timeutils.utcnow()) + + res = utils.is_service_dead(service) + + self.assertFalse(res) + + def test_dead(self): + cfg.CONF.set_override('periodic_interval', 0) + service = mock.Mock(updated_at=timeutils.utcnow()) + + res = utils.is_service_dead(service) + + self.assertTrue(res)