Merge "[OVN] Improve Hash Ring logs"
This commit is contained in:
commit
cbb89fdb14
|
@ -33,6 +33,7 @@ class StandardAttributeIDNotFound(n_exc.NeutronException):
|
|||
|
||||
|
||||
class HashRingIsEmpty(n_exc.NeutronException):
|
||||
message = _('Hash Ring returned empty when hashing "%(key)s". '
|
||||
'This should never happen in a normal situation, please '
|
||||
'check the status of your cluster')
|
||||
message = _('Hash Ring returned empty when hashing "%(key)s". All '
|
||||
'%(node_count)d nodes were found offline. This should never '
|
||||
'happen in a normal situation, please check the status '
|
||||
'of your cluster')
|
||||
|
|
|
@ -38,6 +38,7 @@ class HashRingManager(object):
|
|||
# Flag to rate limit the caching log
|
||||
self._prev_num_nodes = -1
|
||||
self.admin_ctx = context.get_admin_context()
|
||||
self._offline_node_count = 0
|
||||
|
||||
@property
|
||||
def _wait_startup_before_caching(self):
|
||||
|
@ -92,6 +93,11 @@ class HashRingManager(object):
|
|||
self._hash_ring = hashring.HashRing({node.node_uuid
|
||||
for node in nodes})
|
||||
self._last_time_loaded = timeutils.utcnow()
|
||||
self._offline_node_count = db_hash_ring.count_offline_nodes(
|
||||
self.admin_ctx, constants.HASH_RING_NODES_TIMEOUT,
|
||||
self._group)
|
||||
LOG.debug("Hash Ring loaded. %d active nodes. %d offline nodes",
|
||||
len(nodes), self._offline_node_count)
|
||||
|
||||
def refresh(self):
|
||||
self._load_hash_ring(refresh=True)
|
||||
|
@ -108,4 +114,5 @@ class HashRingManager(object):
|
|||
# KeyError is raised
|
||||
return self._hash_ring[key].pop()
|
||||
except KeyError:
|
||||
raise exceptions.HashRingIsEmpty(key=key)
|
||||
raise exceptions.HashRingIsEmpty(
|
||||
key=key, node_count=self._offline_node_count)
|
||||
|
|
|
@ -17,12 +17,14 @@ import datetime
|
|||
|
||||
from neutron_lib.db import api as db_api
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
from oslo_utils import timeutils
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from neutron.db.models import ovn as ovn_models
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE(ralonsoh): this was migrated from networking-ovn to neutron and should
|
||||
|
@ -34,6 +36,8 @@ def add_node(context, group_name, node_uuid=None):
|
|||
with db_api.CONTEXT_WRITER.using(context):
|
||||
context.session.add(ovn_models.OVNHashRing(
|
||||
node_uuid=node_uuid, hostname=CONF.host, group_name=group_name))
|
||||
LOG.info('Node %s from host "%s" and group "%s" added to the Hash Ring',
|
||||
node_uuid, CONF.host, group_name)
|
||||
return node_uuid
|
||||
|
||||
|
||||
|
@ -42,6 +46,8 @@ def remove_nodes_from_host(context, group_name):
|
|||
context.session.query(ovn_models.OVNHashRing).filter(
|
||||
ovn_models.OVNHashRing.hostname == CONF.host,
|
||||
ovn_models.OVNHashRing.group_name == group_name).delete()
|
||||
LOG.info('Nodes from host "%s" and group "%s" removed from the Hash Ring',
|
||||
CONF.host, group_name)
|
||||
|
||||
|
||||
def _touch(context, **filter_args):
|
||||
|
@ -58,12 +64,30 @@ def touch_node(context, node_uuid):
|
|||
_touch(context, node_uuid=node_uuid)
|
||||
|
||||
|
||||
def get_active_nodes(context, interval, group_name, from_host=False):
|
||||
def _get_nodes_query(context, interval, group_name, offline=False,
|
||||
from_host=False):
|
||||
limit = timeutils.utcnow() - datetime.timedelta(seconds=interval)
|
||||
with db_api.CONTEXT_READER.using(context):
|
||||
query = context.session.query(ovn_models.OVNHashRing).filter(
|
||||
ovn_models.OVNHashRing.updated_at >= limit,
|
||||
ovn_models.OVNHashRing.group_name == group_name)
|
||||
|
||||
if offline:
|
||||
query = query.filter(ovn_models.OVNHashRing.updated_at < limit)
|
||||
else:
|
||||
query = query.filter(ovn_models.OVNHashRing.updated_at >= limit)
|
||||
|
||||
if from_host:
|
||||
query = query.filter_by(hostname=CONF.host)
|
||||
return query.all()
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def get_active_nodes(context, interval, group_name, from_host=False):
|
||||
query = _get_nodes_query(context, interval, group_name,
|
||||
from_host=from_host)
|
||||
return query.all()
|
||||
|
||||
|
||||
def count_offline_nodes(context, interval, group_name):
|
||||
query = _get_nodes_query(context, interval, group_name, offline=True)
|
||||
return query.count()
|
||||
|
|
|
@ -242,3 +242,30 @@ class TestHashRing(testlib_api.SqlTestCaseLight):
|
|||
for node in group2:
|
||||
node_db = self._get_node_row(node)
|
||||
self.assertEqual(node_db.created_at, node_db.updated_at)
|
||||
|
||||
def test_count_offline_nodes(self):
|
||||
self._add_nodes_and_assert_exists(count=3)
|
||||
|
||||
# Assert no nodes are considered offline
|
||||
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||
|
||||
# Subtract 60 seconds from utcnow() and touch the nodes to make
|
||||
# them to appear offline
|
||||
fake_utcnow = timeutils.utcnow() - datetime.timedelta(seconds=60)
|
||||
with mock.patch.object(timeutils, 'utcnow') as mock_utcnow:
|
||||
mock_utcnow.return_value = fake_utcnow
|
||||
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
|
||||
HASH_RING_TEST_GROUP)
|
||||
|
||||
# Now assert that all nodes from our host are seeing as offline
|
||||
self.assertEqual(3, ovn_hash_ring_db.count_offline_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||
|
||||
# Touch the nodes again without faking utcnow()
|
||||
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
|
||||
HASH_RING_TEST_GROUP)
|
||||
|
||||
# Assert no nodes are considered offline
|
||||
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
|
||||
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||
|
|
Loading…
Reference in New Issue