Limit nodes by ironic shard key

Ironic in API 1.82 added the option for nodes to be associated with
a specific shard key. This can be used to partition up the nodes within
a single ironic conductor group into smaller sets of nodes that can
each be managed by their own nova-compute ironic service.

We add a new [ironic]shard config option to allow operators to say
which shard each nova-compute process should target.
As such, when the shard is set we ignore the peer_list setting
and always have a hash ring of one.

Also corrects an issue where [ironic]/conductor_group was considered
a mutable configuration; it is not mutable, nor is shards. In any
situation where an operator changes the scope of nodes managed by a
nova compute process, a restart is required.

blueprint ironic-shards
Co-Authored-By: Jay Faulkner <jay@jvf.cc>

Change-Id: Ie0c71f7bc5a62d607ffd3134837299fee952a947
This commit is contained in:
John Garbutt 2023-06-26 11:01:21 +01:00 committed by Jay Faulkner
parent 9a9ab2128b
commit f1a4857d61
4 changed files with 146 additions and 17 deletions

View File

@ -82,6 +82,17 @@ Related options:
'service. Note that setting this to the empty string (``""``) '
'will match the default conductor group, and is different than '
'leaving the option unset.'),
cfg.StrOpt(
'shard',
default=None,
max_length=255,
regex=r'^[a-zA-Z0-9_.-]+$',
help='Specify which ironic shard this nova-compute will manage. '
'This allows you to shard Ironic nodes between compute '
'services across conductors and conductor groups. '
'When a shard is set, the peer_list configuration is ignored. '
'We require that there is at most one nova-compute service '
'for each shard.'),
cfg.ListOpt(
'peer_list',
deprecated_for_removal=True,
@ -90,7 +101,6 @@ Related options:
We do not recommend using nova-compute HA, please use passive
failover of a single nova-compute service instead.""",
default=[],
mutable=True,
help='List of hostnames for all nova-compute services (including '
'this host) with this conductor_group config value. '
'Nodes matching the conductor_group value will be distributed '

View File

@ -2933,6 +2933,31 @@ class HashRingTestCase(test.NoDBTestCase):
uncalled=['host3'])
mock_can_send.assert_called_once_with('1.46')
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_shard(self, mock_can_send):
services = ['host1', 'host2', 'host3']
expected_hosts = {'host1'}
self.mock_is_up.return_value = True
self.flags(host='host1')
self.flags(shard='shard1', group='ironic')
self._test__refresh_hash_ring(services, expected_hosts,
uncalled=['host2', 'host3'])
mock_can_send.assert_not_called()
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_shard_and_cg(self, mock_can_send):
services = ['host1', 'host2', 'host3']
expected_hosts = {'host1'}
self.mock_is_up.return_value = True
self.flags(host='host1')
self.flags(shard='shard1', group='ironic')
self.flags(conductor_group='not-none', group='ironic')
# Note that this is getting ignored, because the shard is set
self.flags(peer_list=['host1', 'host2'], group='ironic')
self._test__refresh_hash_ring(services, expected_hosts,
uncalled=['host2', 'host3'])
mock_can_send.assert_not_called()
@mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
def test__refresh_hash_ring_peer_list_old_api(self, mock_can_send):
mock_can_send.side_effect = (
@ -3007,7 +3032,8 @@ class NodeCacheTestCase(test.NoDBTestCase):
def _test__refresh_cache(self, instances, nodes, hosts, mock_instances,
mock_nodes, mock_hosts, mock_hash_ring,
mock_can_send, partition_key=None,
can_send_146=True):
can_send_146=True, shard=None,
can_send_182=True):
mock_instances.return_value = instances
mock_nodes.return_value = nodes
mock_hosts.side_effect = hosts
@ -3017,12 +3043,18 @@ class NodeCacheTestCase(test.NoDBTestCase):
if not can_send_146:
mock_can_send.side_effect = (
exception.IronicAPIVersionNotAvailable(version='1.46'))
if not can_send_182:
mock_can_send.side_effect = None, (
exception.IronicAPIVersionNotAvailable(version='1.82'))
self.driver.node_cache = {}
self.driver.node_cache_time = None
kwargs = {}
if partition_key is not None and can_send_146:
kwargs['conductor_group'] = partition_key
if shard and can_send_182:
kwargs["shard"] = shard
self.driver._refresh_cache()
@ -3100,6 +3132,74 @@ class NodeCacheTestCase(test.NoDBTestCase):
expected_cache = {n.id: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard)
expected_cache = {n.id: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard_and_conductor_group(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
partition_key = 'some-group'
self.flags(conductor_group=partition_key, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard, partition_key=partition_key)
expected_cache = {n.id: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_shard_and_conductor_group_skip_shard(self):
# normal operation, one compute service
instances = []
nodes = [
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
_get_cached_node(
id=uuidutils.generate_uuid(), instance_id=None),
]
hosts = [self.host, self.host, self.host]
shard = "shard1"
self.flags(shard=shard, group='ironic')
partition_key = 'some-group'
self.flags(conductor_group=partition_key, group='ironic')
self._test__refresh_cache(instances, nodes, hosts,
shard=shard, partition_key=partition_key,
can_send_182=False)
expected_cache = {n.id: n for n in nodes}
self.assertEqual(expected_cache, self.driver.node_cache)
def test__refresh_cache_partition_key_old_api(self):
# normal operation, one compute service
instances = []

View File

@ -698,12 +698,16 @@ class IronicDriver(virt_driver.ComputeDriver):
return False
def _refresh_hash_ring(self, ctxt):
peer_list = None
# When requesting a shard, we assume each compute service is
# targeting a separate shard, so hard code peer_list to
# just this service
peer_list = None if not CONF.ironic.shard else {CONF.host}
# NOTE(jroll) if this is set, we need to limit the set of other
# compute services in the hash ring to hosts that are currently up
# and specified in the peer_list config option, as there's no way
# to check which conductor_group other compute services are using.
if CONF.ironic.conductor_group is not None:
if peer_list is None and CONF.ironic.conductor_group is not None:
try:
# NOTE(jroll) first we need to make sure the Ironic API can
# filter by conductor_group. If it cannot, limiting to
@ -766,21 +770,24 @@ class IronicDriver(virt_driver.ComputeDriver):
# attribute. If the API isn't new enough to support conductor groups,
# we fall back to managing all nodes. If it is new enough, we can
# filter it in the API.
# NOTE(johngarbutt) similarly, if shard is set, we also limit the
# nodes that are returned by the shard key
conductor_group = CONF.ironic.conductor_group
if conductor_group is not None:
try:
shard = CONF.ironic.shard
kwargs = {}
try:
if conductor_group is not None:
self._can_send_version('1.46')
nodes = _get_node_list(conductor_group=conductor_group)
LOG.debug('Limiting manageable ironic nodes to conductor '
'group %s', conductor_group)
except exception.IronicAPIVersionNotAvailable:
LOG.error('Required Ironic API version 1.46 is not '
'available to filter nodes by conductor group. '
'All nodes will be eligible to be managed by '
'this compute service.')
nodes = _get_node_list()
else:
nodes = _get_node_list()
kwargs['conductor_group'] = conductor_group
if shard:
self._can_send_version('1.82')
kwargs['shard'] = shard
nodes = _get_node_list(**kwargs)
except exception.IronicAPIVersionNotAvailable:
LOG.error('Required Ironic API version is not '
'available to filter nodes by conductor group '
'and shard.')
nodes = _get_node_list(**kwargs)
# NOTE(saga): As _get_node_list() will take a long
# time to return in large clusters we need to call it before

View File

@ -0,0 +1,12 @@
---
features:
- |
Ironic nova-compute services can now target a specific shard of ironic
nodes by setting the config ``[ironic]shard``.
This is particularly useful when using active-passive methods to choose
on which physical host your ironic nova-compute process is running,
while ensuring ``[DEFAULT]host`` stays the same for each shard.
You can use this alongside ``[ironic]conductor_group`` to further limit
which ironic nodes are managed by each nova-compute service.
Note that when you use ``[ironic]shard`` the ``[ironic]peer_list``
is hard coded to a single nova-compute service.