Limit nodes by ironic shard key

Ironic in API 1.82 added the option for nodes to be associated with a specific shard key. This can be used to partition up the nodes within a single ironic conductor group into smaller sets of nodes that can each be managed by their own nova-compute ironic service. We add a new [ironic]shard config option to allow operators to say which shard each nova-compute process should target. As such, when the shard is set we ignore the peer_list setting and always have a hash ring of one. Also corrects an issue where [ironic]/conductor_group was considered a mutable configuration; it is not mutable, nor is shards. In any situation where an operator changes the scope of nodes managed by a nova compute process, a restart is required. blueprint ironic-shards Co-Authored-By: Jay Faulkner <jay@jvf.cc> Change-Id: Ie0c71f7bc5a62d607ffd3134837299fee952a947
2023-06-26 11:01:21 +01:00 · 2023-06-26 11:01:21 +01:00 · f1a4857d61
parent 9a9ab2128b
commit f1a4857d61
4 changed files with 146 additions and 17 deletions
--- a/nova/conf/ironic.py
+++ b/nova/conf/ironic.py
@ -82,6 +82,17 @@ Related options:
             'service. Note that setting this to the empty string (``""``) '
             'will match the default conductor group, and is different than '
             'leaving the option unset.'),
+    cfg.StrOpt(
+        'shard',
+        default=None,
+        max_length=255,
+        regex=r'^[a-zA-Z0-9_.-]+$',
+        help='Specify which ironic shard this nova-compute will manage. '
+             'This allows you to shard Ironic nodes between compute '
+             'services across conductors and conductor groups. '
+             'When a shard is set, the peer_list configuration is ignored. '
+             'We require that there is at most one nova-compute service '
+             'for each shard.'),
    cfg.ListOpt(
        'peer_list',
        deprecated_for_removal=True,
@ -90,7 +101,6 @@ Related options:
        We do not recommend using nova-compute HA, please use passive
        failover of a single nova-compute service instead.""",
        default=[],
-        mutable=True,
        help='List of hostnames for all nova-compute services (including '
             'this host) with this conductor_group config value. '
             'Nodes matching the conductor_group value will be distributed '
--- a/nova/tests/unit/virt/ironic/test_driver.py
+++ b/nova/tests/unit/virt/ironic/test_driver.py
@ -2933,6 +2933,31 @@ class HashRingTestCase(test.NoDBTestCase):
                                      uncalled=['host3'])
        mock_can_send.assert_called_once_with('1.46')

+    @mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
+    def test__refresh_hash_ring_peer_list_shard(self, mock_can_send):
+        services = ['host1', 'host2', 'host3']
+        expected_hosts = {'host1'}
+        self.mock_is_up.return_value = True
+        self.flags(host='host1')
+        self.flags(shard='shard1', group='ironic')
+        self._test__refresh_hash_ring(services, expected_hosts,
+                                      uncalled=['host2', 'host3'])
+        mock_can_send.assert_not_called()
+
+    @mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
+    def test__refresh_hash_ring_peer_list_shard_and_cg(self, mock_can_send):
+        services = ['host1', 'host2', 'host3']
+        expected_hosts = {'host1'}
+        self.mock_is_up.return_value = True
+        self.flags(host='host1')
+        self.flags(shard='shard1', group='ironic')
+        self.flags(conductor_group='not-none', group='ironic')
+        # Note that this is getting ignored, because the shard is set
+        self.flags(peer_list=['host1', 'host2'], group='ironic')
+        self._test__refresh_hash_ring(services, expected_hosts,
+                                      uncalled=['host2', 'host3'])
+        mock_can_send.assert_not_called()
+
    @mock.patch.object(ironic_driver.IronicDriver, '_can_send_version')
    def test__refresh_hash_ring_peer_list_old_api(self, mock_can_send):
        mock_can_send.side_effect = (
@ -3007,7 +3032,8 @@ class NodeCacheTestCase(test.NoDBTestCase):
    def _test__refresh_cache(self, instances, nodes, hosts, mock_instances,
                             mock_nodes, mock_hosts, mock_hash_ring,
                             mock_can_send, partition_key=None,
-                             can_send_146=True):
+                             can_send_146=True, shard=None,
+                             can_send_182=True):
        mock_instances.return_value = instances
        mock_nodes.return_value = nodes
        mock_hosts.side_effect = hosts
@ -3017,12 +3043,18 @@ class NodeCacheTestCase(test.NoDBTestCase):
        if not can_send_146:
            mock_can_send.side_effect = (
                exception.IronicAPIVersionNotAvailable(version='1.46'))
+        if not can_send_182:
+            mock_can_send.side_effect = None, (
+                exception.IronicAPIVersionNotAvailable(version='1.82'))
+
        self.driver.node_cache = {}
        self.driver.node_cache_time = None

        kwargs = {}
        if partition_key is not None and can_send_146:
            kwargs['conductor_group'] = partition_key
+        if shard and can_send_182:
+            kwargs["shard"] = shard

        self.driver._refresh_cache()

@ -3100,6 +3132,74 @@ class NodeCacheTestCase(test.NoDBTestCase):
        expected_cache = {n.id: n for n in nodes}
        self.assertEqual(expected_cache, self.driver.node_cache)

+    def test__refresh_cache_shard(self):
+        # normal operation, one compute service
+        instances = []
+        nodes = [
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+        ]
+        hosts = [self.host, self.host, self.host]
+        shard = "shard1"
+        self.flags(shard=shard, group='ironic')
+
+        self._test__refresh_cache(instances, nodes, hosts,
+                                  shard=shard)
+
+        expected_cache = {n.id: n for n in nodes}
+        self.assertEqual(expected_cache, self.driver.node_cache)
+
+    def test__refresh_cache_shard_and_conductor_group(self):
+        # normal operation, one compute service
+        instances = []
+        nodes = [
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+        ]
+        hosts = [self.host, self.host, self.host]
+        shard = "shard1"
+        self.flags(shard=shard, group='ironic')
+        partition_key = 'some-group'
+        self.flags(conductor_group=partition_key, group='ironic')
+
+        self._test__refresh_cache(instances, nodes, hosts,
+                                  shard=shard, partition_key=partition_key)
+
+        expected_cache = {n.id: n for n in nodes}
+        self.assertEqual(expected_cache, self.driver.node_cache)
+
+    def test__refresh_cache_shard_and_conductor_group_skip_shard(self):
+        # normal operation, one compute service
+        instances = []
+        nodes = [
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+            _get_cached_node(
+                id=uuidutils.generate_uuid(), instance_id=None),
+        ]
+        hosts = [self.host, self.host, self.host]
+        shard = "shard1"
+        self.flags(shard=shard, group='ironic')
+        partition_key = 'some-group'
+        self.flags(conductor_group=partition_key, group='ironic')
+
+        self._test__refresh_cache(instances, nodes, hosts,
+                                  shard=shard, partition_key=partition_key,
+                                  can_send_182=False)
+
+        expected_cache = {n.id: n for n in nodes}
+        self.assertEqual(expected_cache, self.driver.node_cache)
+
    def test__refresh_cache_partition_key_old_api(self):
        # normal operation, one compute service
        instances = []
--- a/nova/virt/ironic/driver.py
+++ b/nova/virt/ironic/driver.py
@ -698,12 +698,16 @@ class IronicDriver(virt_driver.ComputeDriver):
            return False

    def _refresh_hash_ring(self, ctxt):
-        peer_list = None
+        # When requesting a shard, we assume each compute service is
+        # targeting a separate shard, so hard code peer_list to
+        # just this service
+        peer_list = None if not CONF.ironic.shard else {CONF.host}
+
        # NOTE(jroll) if this is set, we need to limit the set of other
        # compute services in the hash ring to hosts that are currently up
        # and specified in the peer_list config option, as there's no way
        # to check which conductor_group other compute services are using.
-        if CONF.ironic.conductor_group is not None:
+        if peer_list is None and CONF.ironic.conductor_group is not None:
            try:
                # NOTE(jroll) first we need to make sure the Ironic API can
                # filter by conductor_group. If it cannot, limiting to
@ -766,21 +770,24 @@ class IronicDriver(virt_driver.ComputeDriver):
        # attribute. If the API isn't new enough to support conductor groups,
        # we fall back to managing all nodes. If it is new enough, we can
        # filter it in the API.
+        # NOTE(johngarbutt) similarly, if shard is set, we also limit the
+        # nodes that are returned by the shard key
        conductor_group = CONF.ironic.conductor_group
-        if conductor_group is not None:
-            try:
+        shard = CONF.ironic.shard
+        kwargs = {}
+        try:
+            if conductor_group is not None:
                self._can_send_version('1.46')
-                nodes = _get_node_list(conductor_group=conductor_group)
-                LOG.debug('Limiting manageable ironic nodes to conductor '
-                          'group %s', conductor_group)
-            except exception.IronicAPIVersionNotAvailable:
-                LOG.error('Required Ironic API version 1.46 is not '
-                          'available to filter nodes by conductor group. '
-                          'All nodes will be eligible to be managed by '
-                          'this compute service.')
-                nodes = _get_node_list()
-        else:
-            nodes = _get_node_list()
+                kwargs['conductor_group'] = conductor_group
+            if shard:
+                self._can_send_version('1.82')
+                kwargs['shard'] = shard
+            nodes = _get_node_list(**kwargs)
+        except exception.IronicAPIVersionNotAvailable:
+            LOG.error('Required Ironic API version is not '
+                      'available to filter nodes by conductor group '
+                      'and shard.')
+            nodes = _get_node_list(**kwargs)

        # NOTE(saga): As _get_node_list() will take a long
        # time to return in large clusters we need to call it before
--- a/releasenotes/notes/ironic-shards-5641e4b1ab5bb7aa.yaml
+++ b/releasenotes/notes/ironic-shards-5641e4b1ab5bb7aa.yaml
@ -0,0 +1,12 @@
+---
+features:
+  - |
+    Ironic nova-compute services can now target a specific shard of ironic
+    nodes by setting the config ``[ironic]shard``.
+    This is particularly useful when using active-passive methods to choose
+    on which physical host your ironic nova-compute process is running,
+    while ensuring ``[DEFAULT]host`` stays the same for each shard.
+    You can use this alongside ``[ironic]conductor_group`` to further limit
+    which ironic nodes are managed by each nova-compute service.
+    Note that when you use ``[ironic]shard`` the ``[ironic]peer_list``
+    is hard coded to a single nova-compute service.