diff --git a/ironic_inspector/common/ironic.py b/ironic_inspector/common/ironic.py index 5e3684b81..a0a6ad734 100644 --- a/ironic_inspector/common/ironic.py +++ b/ironic_inspector/common/ironic.py @@ -17,6 +17,7 @@ from ironicclient import client from ironicclient import exceptions as ironic_exc import netaddr from oslo_config import cfg +import retrying from ironic_inspector.common.i18n import _ from ironic_inspector.common import keystone @@ -162,3 +163,16 @@ def get_node(node_id, ironic=None, **kwargs): except ironic_exc.HttpError as exc: raise utils.Error(_("Cannot get node %(node)s: %(exc)s") % {'node': node_id, 'exc': exc}) + + +@retrying.retry( + retry_on_exception=lambda exc: isinstance(exc, ironic_exc.ClientException), + stop_max_attempt_number=5, wait_fixed=1000) +def call_with_retries(func, *args, **kwargs): + """Call an ironic client function retrying all errors. + + If an ironic client exception is raised, try calling the func again, + at most 5 times, waiting 1 sec between each call. If on the 5th attempt + the func raises again, the exception is propagated to the caller. + """ + return func(*args, **kwargs) diff --git a/ironic_inspector/pxe_filter/dnsmasq.py b/ironic_inspector/pxe_filter/dnsmasq.py index e46f0344c..e4acba7a2 100644 --- a/ironic_inspector/pxe_filter/dnsmasq.py +++ b/ironic_inspector/pxe_filter/dnsmasq.py @@ -66,7 +66,8 @@ class DnsmasqFilter(pxe_filter.BaseFilter): timestamp_start = timeutils.utcnow() active_macs = node_cache.active_macs() ironic_macs = set(port.address for port in - ironic.port.list(limit=0, fields=['address'])) + ir_utils.call_with_retries(ironic.port.list, limit=0, + fields=['address'])) blacklist_macs = _get_blacklist() # NOTE(milan) whitelist MACs of ports not kept in ironic anymore # also whitelist active MACs that are still blacklisted in the diff --git a/ironic_inspector/pxe_filter/iptables.py b/ironic_inspector/pxe_filter/iptables.py index e2afdbd0a..44ba878c7 100644 --- a/ironic_inspector/pxe_filter/iptables.py +++ b/ironic_inspector/pxe_filter/iptables.py @@ -19,6 +19,7 @@ from eventlet.green import subprocess from oslo_config import cfg from oslo_log import log +from ironic_inspector.common import ironic as ir_utils from ironic_inspector import node_cache from ironic_inspector.pxe_filter import base as pxe_filter @@ -225,9 +226,9 @@ def _ib_mac_to_rmac_mapping(ports): def _get_blacklist(ironic): - ports = [ - port.address for port in ironic.port.list( - limit=0, fields=['address', 'extra']) - if port.address not in node_cache.active_macs()] + ports = [port.address for port in + ir_utils.call_with_retries(ironic.port.list, limit=0, + fields=['address', 'extra']) + if port.address not in node_cache.active_macs()] _ib_mac_to_rmac_mapping(ports) return ports diff --git a/ironic_inspector/test/unit/test_common_ironic.py b/ironic_inspector/test/unit/test_common_ironic.py index 5aa63b3c3..6afb908da 100644 --- a/ironic_inspector/test/unit/test_common_ironic.py +++ b/ironic_inspector/test/unit/test_common_ironic.py @@ -15,6 +15,7 @@ import socket import unittest from ironicclient import client +from ironicclient import exc as ironic_exc import mock from oslo_config import cfg @@ -144,3 +145,41 @@ class TestCapabilities(unittest.TestCase): output = ir_utils.dict_to_capabilities(capabilities_dict) self.assertIn('cat:meow', output) self.assertIn('dog:wuff', output) + + +class TestCallWithRetries(unittest.TestCase): + def setUp(self): + super(TestCallWithRetries, self).setUp() + self.call = mock.Mock(spec=[]) + + def test_no_retries_on_success(self): + result = ir_utils.call_with_retries(self.call, 'meow', answer=42) + self.assertEqual(result, self.call.return_value) + self.call.assert_called_once_with('meow', answer=42) + + def test_no_retries_on_python_error(self): + self.call.side_effect = RuntimeError('boom') + self.assertRaisesRegexp(RuntimeError, 'boom', + ir_utils.call_with_retries, + self.call, 'meow', answer=42) + self.call.assert_called_once_with('meow', answer=42) + + @mock.patch('time.sleep', lambda _x: None) + def test_retries_on_ironicclient_error(self): + self.call.side_effect = [ + ironic_exc.ClientException('boom') + ] * 3 + [mock.sentinel.result] + + result = ir_utils.call_with_retries(self.call, 'meow', answer=42) + self.assertEqual(result, mock.sentinel.result) + self.call.assert_called_with('meow', answer=42) + self.assertEqual(4, self.call.call_count) + + @mock.patch('time.sleep', lambda _x: None) + def test_retries_on_ironicclient_error_with_failure(self): + self.call.side_effect = ironic_exc.ClientException('boom') + self.assertRaisesRegexp(ironic_exc.ClientException, 'boom', + ir_utils.call_with_retries, + self.call, 'meow', answer=42) + self.call.assert_called_with('meow', answer=42) + self.assertEqual(5, self.call.call_count) diff --git a/ironic_inspector/test/unit/test_dnsmasq_pxe_filter.py b/ironic_inspector/test/unit/test_dnsmasq_pxe_filter.py index 32e554978..4b5e5f521 100644 --- a/ironic_inspector/test/unit/test_dnsmasq_pxe_filter.py +++ b/ironic_inspector/test/unit/test_dnsmasq_pxe_filter.py @@ -15,6 +15,7 @@ import datetime import os import fixtures +from ironicclient import exc as ironic_exc import mock from oslo_config import cfg import six @@ -299,6 +300,29 @@ class TestSync(DnsmasqTestBase): self.timestamp_end - self.timestamp_start) ]) + @mock.patch('time.sleep', lambda _x: None) + def test__sync_with_port_list_retries(self): + self.mock_ironic.port.list.side_effect = [ + ironic_exc.ConnectionRefused('boom'), + [mock.Mock(address=address) for address in self.ironic_macs] + ] + self.driver._sync(self.mock_ironic) + + self.mock__whitelist_mac.assert_has_calls([mock.call('active_mac'), + mock.call('gone_mac')], + any_order=True) + self.mock__blacklist_mac.assert_has_calls([mock.call('new_mac')], + any_order=True) + self.mock_ironic.port.list.assert_called_with(limit=0, + fields=['address']) + self.mock_active_macs.assert_called_once_with() + self.mock__get_blacklist.assert_called_once_with() + self.mock_log.debug.assert_has_calls([ + mock.call('Syncing the driver'), + mock.call('The dnsmasq PXE filter was synchronized (took %s)', + self.timestamp_end - self.timestamp_start) + ]) + class Test_Execute(test_base.BaseTest): def setUp(self): diff --git a/ironic_inspector/test/unit/test_iptables.py b/ironic_inspector/test/unit/test_iptables.py index 8b334e875..4602a94fd 100644 --- a/ironic_inspector/test/unit/test_iptables.py +++ b/ironic_inspector/test/unit/test_iptables.py @@ -14,6 +14,7 @@ # under the License. import fixtures +from ironicclient import exc as ironic_exc import mock from oslo_config import cfg @@ -354,3 +355,21 @@ class TestGetBlacklist(test_base.BaseTest): self.mock_ironic.port.list.assert_called_once_with( limit=0, fields=['address', 'extra']) self.mock__ib_mac_to_rmac_mapping.assert_called_once_with(ports) + + @mock.patch('time.sleep', lambda _x: None) + def test_retry_on_port_list_failure(self): + self.mock_ironic.port.list.side_effect = [ + ironic_exc.ConnectionRefused('boom'), + [ + mock.Mock(address='foo'), + mock.Mock(address='bar'), + ] + ] + self.mock_active_macs.return_value = {'foo'} + + ports = iptables._get_blacklist(self.mock_ironic) + # foo is an active address so we expect the blacklist contains only bar + self.assertEqual(['bar'], ports) + self.mock_ironic.port.list.assert_called_with( + limit=0, fields=['address', 'extra']) + self.mock__ib_mac_to_rmac_mapping.assert_called_once_with(ports) diff --git a/releasenotes/notes/port-list-retry-745d1cf41780e961.yaml b/releasenotes/notes/port-list-retry-745d1cf41780e961.yaml new file mode 100644 index 000000000..ec8ebb238 --- /dev/null +++ b/releasenotes/notes/port-list-retry-745d1cf41780e961.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + The periodic PXE filter update task now retries fetching port list from + the Bare Metal service 5 times (with 1 second delay) before giving up. + This ensures that a temporary networking glitch will not result in + the ironic-inspector service stopping. +upgrade: + - | + Adds dependency on the `retrying `_ + python library. diff --git a/requirements.txt b/requirements.txt index 10d82ee0d..c3b44207d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ oslo.policy>=1.30.0 # Apache-2.0 oslo.rootwrap>=5.8.0 # Apache-2.0 oslo.serialization!=2.19.1,>=2.18.0 # Apache-2.0 oslo.utils>=3.33.0 # Apache-2.0 +retrying!=1.3.0,>=1.2.3 # Apache-2.0 six>=1.10.0 # MIT stevedore>=1.20.0 # Apache-2.0 SQLAlchemy!=1.1.5,!=1.1.6,!=1.1.7,!=1.1.8,>=1.0.10 # MIT