Merge "[OVN] Improve Hash Ring logs"

This commit is contained in:
Zuul 2023-06-16 13:02:09 +00:00 committed by Gerrit Code Review
commit cbb89fdb14
4 changed files with 66 additions and 7 deletions

View File

@ -33,6 +33,7 @@ class StandardAttributeIDNotFound(n_exc.NeutronException):
class HashRingIsEmpty(n_exc.NeutronException):
message = _('Hash Ring returned empty when hashing "%(key)s". '
'This should never happen in a normal situation, please '
'check the status of your cluster')
message = _('Hash Ring returned empty when hashing "%(key)s". All '
'%(node_count)d nodes were found offline. This should never '
'happen in a normal situation, please check the status '
'of your cluster')

View File

@ -38,6 +38,7 @@ class HashRingManager(object):
# Flag to rate limit the caching log
self._prev_num_nodes = -1
self.admin_ctx = context.get_admin_context()
self._offline_node_count = 0
@property
def _wait_startup_before_caching(self):
@ -92,6 +93,11 @@ class HashRingManager(object):
self._hash_ring = hashring.HashRing({node.node_uuid
for node in nodes})
self._last_time_loaded = timeutils.utcnow()
self._offline_node_count = db_hash_ring.count_offline_nodes(
self.admin_ctx, constants.HASH_RING_NODES_TIMEOUT,
self._group)
LOG.debug("Hash Ring loaded. %d active nodes. %d offline nodes",
len(nodes), self._offline_node_count)
def refresh(self):
self._load_hash_ring(refresh=True)
@ -108,4 +114,5 @@ class HashRingManager(object):
# KeyError is raised
return self._hash_ring[key].pop()
except KeyError:
raise exceptions.HashRingIsEmpty(key=key)
raise exceptions.HashRingIsEmpty(
key=key, node_count=self._offline_node_count)

View File

@ -17,12 +17,14 @@ import datetime
from neutron_lib.db import api as db_api
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils
from oslo_utils import uuidutils
from neutron.db.models import ovn as ovn_models
CONF = cfg.CONF
LOG = log.getLogger(__name__)
# NOTE(ralonsoh): this was migrated from networking-ovn to neutron and should
@ -34,6 +36,8 @@ def add_node(context, group_name, node_uuid=None):
with db_api.CONTEXT_WRITER.using(context):
context.session.add(ovn_models.OVNHashRing(
node_uuid=node_uuid, hostname=CONF.host, group_name=group_name))
LOG.info('Node %s from host "%s" and group "%s" added to the Hash Ring',
node_uuid, CONF.host, group_name)
return node_uuid
@ -42,6 +46,8 @@ def remove_nodes_from_host(context, group_name):
context.session.query(ovn_models.OVNHashRing).filter(
ovn_models.OVNHashRing.hostname == CONF.host,
ovn_models.OVNHashRing.group_name == group_name).delete()
LOG.info('Nodes from host "%s" and group "%s" removed from the Hash Ring',
CONF.host, group_name)
def _touch(context, **filter_args):
@ -58,12 +64,30 @@ def touch_node(context, node_uuid):
_touch(context, node_uuid=node_uuid)
def get_active_nodes(context, interval, group_name, from_host=False):
def _get_nodes_query(context, interval, group_name, offline=False,
from_host=False):
limit = timeutils.utcnow() - datetime.timedelta(seconds=interval)
with db_api.CONTEXT_READER.using(context):
query = context.session.query(ovn_models.OVNHashRing).filter(
ovn_models.OVNHashRing.updated_at >= limit,
ovn_models.OVNHashRing.group_name == group_name)
if offline:
query = query.filter(ovn_models.OVNHashRing.updated_at < limit)
else:
query = query.filter(ovn_models.OVNHashRing.updated_at >= limit)
if from_host:
query = query.filter_by(hostname=CONF.host)
return query.all()
return query
def get_active_nodes(context, interval, group_name, from_host=False):
query = _get_nodes_query(context, interval, group_name,
from_host=from_host)
return query.all()
def count_offline_nodes(context, interval, group_name):
query = _get_nodes_query(context, interval, group_name, offline=True)
return query.count()

View File

@ -242,3 +242,30 @@ class TestHashRing(testlib_api.SqlTestCaseLight):
for node in group2:
node_db = self._get_node_row(node)
self.assertEqual(node_db.created_at, node_db.updated_at)
def test_count_offline_nodes(self):
self._add_nodes_and_assert_exists(count=3)
# Assert no nodes are considered offline
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
# Subtract 60 seconds from utcnow() and touch the nodes to make
# them to appear offline
fake_utcnow = timeutils.utcnow() - datetime.timedelta(seconds=60)
with mock.patch.object(timeutils, 'utcnow') as mock_utcnow:
mock_utcnow.return_value = fake_utcnow
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
HASH_RING_TEST_GROUP)
# Now assert that all nodes from our host are seeing as offline
self.assertEqual(3, ovn_hash_ring_db.count_offline_nodes(
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
# Touch the nodes again without faking utcnow()
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
HASH_RING_TEST_GROUP)
# Assert no nodes are considered offline
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))