From 766c2738ae16ebbae37f26b17e261f0112616bb5 Mon Sep 17 00:00:00 2001 From: Itsuro Oda Date: Fri, 9 Jan 2015 08:47:56 +0900 Subject: [PATCH] Enable services on agents with admin_state_up False Previously when admin_state_up of an agent is turned to False, all services on it will be disabled. This fix makes existing services on agents with admin_state_up False keep available. To keep current behavior available the following configuration parameter added. * enable_services_on_agents_with_admin_state_down If the parameter is True, existing services on agents with admin_state_up False keep available. No more service will be scheduled to the agent automatically. But adding a service to the agent manually is available. i.e. admin_state_up: False means to stop automatic scheduling under the parameter is True. The default of the parameter is False (current behavior). Change-Id: Ifba606a5c1f3f07d717c7695a7a64e16238c2057 Closes-Bug: #1408488 --- etc/neutron.conf | 8 ++ .../rpc/agentnotifiers/dhcp_rpc_agent_api.py | 7 +- .../rpc/agentnotifiers/l3_rpc_agent_api.py | 7 +- .../agentnotifiers/metering_rpc_agent_api.py | 4 +- neutron/db/agentschedulers_db.py | 29 ++++++- neutron/db/l3_agentschedulers_db.py | 16 ++-- .../agentnotifiers/test_dhcp_rpc_agent_api.py | 24 +++++- .../unit/openvswitch/test_agent_scheduler.py | 82 ++++++++++++++++++- 8 files changed, 158 insertions(+), 19 deletions(-) diff --git a/etc/neutron.conf b/etc/neutron.conf index e401f76883e..c68af4e66d9 100644 --- a/etc/neutron.conf +++ b/etc/neutron.conf @@ -212,6 +212,14 @@ lock_path = $state_path/lock # DHCP agents for configured networks. # dhcp_agents_per_network = 1 +# Enable services on agents with admin_state_up False. +# If this option is False, when admin_state_up of an agent is turned to +# False, services on it will be disabled. If this option is True, services +# on agents with admin_state_up False keep available and manual scheduling +# to such agents is available. Agents with admin_state_up False are not +# selected for automatic scheduling regardless of this option. +# enable_services_on_agents_with_admin_state_down = False + # =========== end of items for agent scheduler extension ===== # =========== items for l3 extension ============== diff --git a/neutron/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py index 8c00223fc9d..fb741baffaf 100644 --- a/neutron/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from oslo_config import cfg from oslo_log import log as logging import oslo_messaging @@ -75,9 +76,11 @@ class DhcpAgentNotifyAPI(object): return new_agents + existing_agents def _get_enabled_agents(self, context, network, agents, method, payload): - """Get the list of agents whose admin_state is UP.""" + """Get the list of agents who can provide services.""" network_id = network['id'] - enabled_agents = [x for x in agents if x.admin_state_up] + enabled_agents = agents + if not cfg.CONF.enable_services_on_agents_with_admin_state_down: + enabled_agents = [x for x in agents if x.admin_state_up] active_agents = [x for x in agents if x.is_active] len_enabled_agents = len(enabled_agents) len_active_agents = len(active_agents) diff --git a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py index 097e7f88bd7..c0a7160c02b 100644 --- a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py @@ -22,6 +22,7 @@ from neutron.common import constants from neutron.common import rpc as n_rpc from neutron.common import topics from neutron.common import utils +from neutron.db import agentschedulers_db from neutron.i18n import _LE from neutron import manager from neutron.plugins.common import constants as service_constants @@ -51,10 +52,11 @@ class L3AgentNotifyAPI(object): adminContext = context if context.is_admin else context.elevated() plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) + state = agentschedulers_db.get_admin_state_up_filter() for router_id in router_ids: l3_agents = plugin.get_l3_agents_hosting_routers( adminContext, [router_id], - admin_state_up=True, + admin_state_up=state, active=True) if shuffle_agents: random.shuffle(l3_agents) @@ -78,10 +80,11 @@ class L3AgentNotifyAPI(object): context or context.elevated()) plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) + state = agentschedulers_db.get_admin_state_up_filter() l3_agents = (plugin. get_l3_agents_hosting_routers(adminContext, [router_id], - admin_state_up=True, + admin_state_up=state, active=True)) # TODO(murali): replace cast with fanout to avoid performance # issues at greater scale. diff --git a/neutron/api/rpc/agentnotifiers/metering_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/metering_rpc_agent_api.py index f7640cf8fa0..cdffcdbe26c 100644 --- a/neutron/api/rpc/agentnotifiers/metering_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/metering_rpc_agent_api.py @@ -19,6 +19,7 @@ from neutron.common import constants from neutron.common import rpc as n_rpc from neutron.common import topics from neutron.common import utils +from neutron.db import agentschedulers_db from neutron import manager from neutron.plugins.common import constants as service_constants @@ -40,10 +41,11 @@ class MeteringAgentNotifyAPI(object): service_constants.L3_ROUTER_NAT) l3_routers = {} + state = agentschedulers_db.get_admin_state_up_filter() for router in routers: l3_agents = plugin.get_l3_agents_hosting_routers( adminContext, [router['id']], - admin_state_up=True, + admin_state_up=state, active=True) for l3_agent in l3_agents: LOG.debug('Notify metering agent at %(topic)s.%(host)s ' diff --git a/neutron/db/agentschedulers_db.py b/neutron/db/agentschedulers_db.py index 1a3d56b1b65..39cb3eddd48 100644 --- a/neutron/db/agentschedulers_db.py +++ b/neutron/db/agentschedulers_db.py @@ -49,6 +49,15 @@ AGENTS_SCHEDULER_OPTS = [ 'agents.')), cfg.IntOpt('dhcp_agents_per_network', default=1, help=_('Number of DHCP agents scheduled to host a network.')), + cfg.BoolOpt('enable_services_on_agents_with_admin_state_down', + default=False, + help=_('Enable services on an agent with admin_state_up ' + 'False. If this option is False, when admin_state_up ' + 'of an agent is turned False, services on it will be ' + 'disabled. Agents with admin_state_up False are not ' + 'selected for automatic scheduling regardless of this ' + 'option. But manual scheduling to such agents is ' + 'available if this option is True.')), ] cfg.CONF.register_opts(AGENTS_SCHEDULER_OPTS) @@ -314,7 +323,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler with context.session.begin(subtransactions=True): agent_db = self._get_agent(context, id) if (agent_db['agent_type'] != constants.AGENT_TYPE_DHCP or - not agent_db['admin_state_up']): + not services_available(agent_db['admin_state_up'])): raise dhcpagentscheduler.InvalidDHCPAgent(id=id) dhcp_agents = self.get_dhcp_agents_hosting_networks( context, [network_id]) @@ -384,7 +393,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler LOG.debug("DHCP Agent not found on host %s", host) return [] - if not agent.admin_state_up: + if not services_available(agent.admin_state_up): return [] query = context.session.query(NetworkDhcpAgentBinding.network_id) query = query.filter(NetworkDhcpAgentBinding.dhcp_agent_id == agent.id) @@ -416,3 +425,19 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler def auto_schedule_networks(self, context, host): if self.network_scheduler: self.network_scheduler.auto_schedule_networks(self, context, host) + + +# helper functions for readability. +def services_available(admin_state_up): + if cfg.CONF.enable_services_on_agents_with_admin_state_down: + # Services are available regardless admin_state_up + return True + return admin_state_up + + +def get_admin_state_up_filter(): + if cfg.CONF.enable_services_on_agents_with_admin_state_down: + # Avoid filtering on admin_state_up at all + return None + # Filters on admin_state_up is True + return True diff --git a/neutron/db/l3_agentschedulers_db.py b/neutron/db/l3_agentschedulers_db.py index c0c2052eace..50de8e3ef88 100644 --- a/neutron/db/l3_agentschedulers_db.py +++ b/neutron/db/l3_agentschedulers_db.py @@ -155,9 +155,10 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, is_wrong_type_or_unsuitable_agent = ( agent['agent_type'] != constants.AGENT_TYPE_L3 or - not agent['admin_state_up'] or - not self.get_l3_agent_candidates(context, router, [agent]) - ) + not agentschedulers_db.services_available(agent['admin_state_up']) + or + not self.get_l3_agent_candidates(context, router, [agent], + ignore_admin_state=True)) if is_wrong_type_or_unsuitable_agent: raise l3agentscheduler.InvalidL3Agent(id=agent['id']) @@ -287,7 +288,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, self, context, host, router_ids): agent = self._get_agent_by_type_and_host( context, constants.AGENT_TYPE_L3, host) - if not agent.admin_state_up: + if not agentschedulers_db.services_available(agent.admin_state_up): return [] query = context.session.query(RouterL3AgentBinding.router_id) query = query.filter( @@ -429,11 +430,14 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase, candidates.append(l3_agent) return candidates - def get_l3_agent_candidates(self, context, sync_router, l3_agents): + def get_l3_agent_candidates(self, context, sync_router, l3_agents, + ignore_admin_state=False): """Get the valid l3 agents for the router from a list of l3_agents.""" candidates = [] for l3_agent in l3_agents: - if not l3_agent.admin_state_up: + if not ignore_admin_state and not l3_agent.admin_state_up: + # ignore_admin_state True comes from manual scheduling + # where admin_state_up judgement is already done. continue agent_conf = self.get_configuration_dict(l3_agent) router_id = agent_conf.get('router_id', None) diff --git a/neutron/tests/unit/api/rpc/agentnotifiers/test_dhcp_rpc_agent_api.py b/neutron/tests/unit/api/rpc/agentnotifiers/test_dhcp_rpc_agent_api.py index 89b41f13620..2704ba1c842 100644 --- a/neutron/tests/unit/api/rpc/agentnotifiers/test_dhcp_rpc_agent_api.py +++ b/neutron/tests/unit/api/rpc/agentnotifiers/test_dhcp_rpc_agent_api.py @@ -21,6 +21,7 @@ from oslo_utils import timeutils from neutron.api.rpc.agentnotifiers import dhcp_rpc_agent_api from neutron.common import utils from neutron.db import agents_db +from neutron.db.agentschedulers_db import cfg from neutron.tests import base @@ -87,11 +88,14 @@ class TestDhcpAgentNotifyAPI(base.BaseTestCase): self.assertEqual(expected_errors, self.mock_log.error.call_count) def test__get_enabled_agents(self): - agent = agents_db.Agent() - agent.admin_state_up = True - agent.heartbeat_timestamp = timeutils.utcnow() + agent1 = agents_db.Agent() + agent1.admin_state_up = True + agent1.heartbeat_timestamp = timeutils.utcnow() + agent2 = agents_db.Agent() + agent2.admin_state_up = False + agent2.heartbeat_timestamp = timeutils.utcnow() network = {'id': 'foo_network_id'} - self._test__get_enabled_agents(network, agents=[agent]) + self._test__get_enabled_agents(network, agents=[agent1]) def test__get_enabled_agents_with_inactive_ones(self): agent1 = agents_db.Agent() @@ -111,6 +115,18 @@ class TestDhcpAgentNotifyAPI(base.BaseTestCase): self._test__get_enabled_agents(network, [], port_count=20, expected_warnings=0, expected_errors=1) + def test__get_enabled_agents_with_admin_state_down(self): + cfg.CONF.set_override( + 'enable_services_on_agents_with_admin_state_down', True) + agent1 = agents_db.Agent() + agent1.admin_state_up = True + agent1.heartbeat_timestamp = timeutils.utcnow() + agent2 = agents_db.Agent() + agent2.admin_state_up = False + agent2.heartbeat_timestamp = timeutils.utcnow() + network = {'id': 'foo_network_id'} + self._test__get_enabled_agents(network, agents=[agent1, agent2]) + def test__notify_agents_fanout_required(self): self.notifier._notify_agents(mock.ANY, 'network_delete_end', diff --git a/neutron/tests/unit/openvswitch/test_agent_scheduler.py b/neutron/tests/unit/openvswitch/test_agent_scheduler.py index fcf6717b2ad..feeb8a436d3 100644 --- a/neutron/tests/unit/openvswitch/test_agent_scheduler.py +++ b/neutron/tests/unit/openvswitch/test_agent_scheduler.py @@ -584,11 +584,13 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): expected_code=exc.HTTPForbidden.code, admin_context=False) - def test_network_add_to_dhcp_agent(self): + def _test_network_add_to_dhcp_agent(self, admin_state_up=True): with self.network() as net1: self._register_agent_states() hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, DHCP_HOSTA) + if not admin_state_up: + self._set_agent_admin_state_up(DHCP_HOSTA, False) num_before_add = len( self._list_networks_hosted_by_dhcp_agent( hosta_id)['networks']) @@ -600,6 +602,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): self.assertEqual(0, num_before_add) self.assertEqual(1, num_after_add) + def test_network_add_to_dhcp_agent(self): + self._test_network_add_to_dhcp_agent() + + def test_network_add_to_dhcp_agent_with_admin_state_down(self): + cfg.CONF.set_override( + 'enable_services_on_agents_with_admin_state_down', True) + self._test_network_add_to_dhcp_agent(admin_state_up=False) + def test_network_remove_from_dhcp_agent(self): dhcp_hosta = { 'binary': 'neutron-dhcp-agent', @@ -654,6 +664,39 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): self.assertEqual(port_list['ports'][0]['device_id'], constants.DEVICE_ID_RESERVED_DHCP_PORT) + def _test_get_active_networks_from_admin_state_down_agent(self, + keep_services): + if keep_services: + cfg.CONF.set_override( + 'enable_services_on_agents_with_admin_state_down', True) + dhcp_hosta = { + 'binary': 'neutron-dhcp-agent', + 'host': DHCP_HOSTA, + 'topic': 'DHCP_AGENT', + 'configurations': {'dhcp_driver': 'dhcp_driver', + 'use_namespaces': True, + }, + 'agent_type': constants.AGENT_TYPE_DHCP} + self._register_one_agent_state(dhcp_hosta) + dhcp_rpc_cb = dhcp_rpc.DhcpRpcCallback() + with self.port(): + nets = dhcp_rpc_cb.get_active_networks(self.adminContext, + host=DHCP_HOSTA) + self.assertEqual(1, len(nets)) + self._set_agent_admin_state_up(DHCP_HOSTA, False) + nets = dhcp_rpc_cb.get_active_networks(self.adminContext, + host=DHCP_HOSTA) + if keep_services: + self.assertEqual(1, len(nets)) + else: + self.assertEqual(0, len(nets)) + + def test_dhcp_agent_keep_services_off(self): + self._test_get_active_networks_from_admin_state_down_agent(False) + + def test_dhcp_agent_keep_services_on(self): + self._test_get_active_networks_from_admin_state_down_agent(True) + def _take_down_agent_and_run_reschedule(self, host): # take down the agent on host A and ensure B is alive self.adminContext.session.begin(subtransactions=True) @@ -1106,11 +1149,13 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): None) self._delete('routers', router['router']['id']) - def test_router_add_to_l3_agent(self): + def _test_router_add_to_l3_agent(self, admin_state_up=True): with self.router() as router1: self._register_agent_states() hosta_id = self._get_agent_id(constants.AGENT_TYPE_L3, L3_HOSTA) + if not admin_state_up: + self._set_agent_admin_state_up(L3_HOSTA, False) num_before_add = len( self._list_routers_hosted_by_l3_agent( hosta_id)['routers']) @@ -1127,6 +1172,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): self.assertEqual(0, num_before_add) self.assertEqual(1, num_after_add) + def test_router_add_to_l3_agent(self): + self._test_router_add_to_l3_agent() + + def test_router_add_to_l3_agent_with_admin_state_down(self): + cfg.CONF.set_override( + 'enable_services_on_agents_with_admin_state_down', True) + self._test_router_add_to_l3_agent(admin_state_up=False) + def test_router_add_to_l3_agent_two_times(self): with self.router() as router1: self._register_agent_states() @@ -1174,6 +1227,31 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase): expected_code=exc.HTTPForbidden.code, admin_context=False) + def _test_sync_routers_from_admin_state_down_agent(self, keep_services): + if keep_services: + cfg.CONF.set_override( + 'enable_services_on_agents_with_admin_state_down', True) + l3_rpc_cb = l3_rpc.L3RpcCallback() + self._register_agent_states() + hosta_id = self._get_agent_id(constants.AGENT_TYPE_L3, L3_HOSTA) + with self.router() as router: + self._add_router_to_l3_agent(hosta_id, + router['router']['id']) + routers = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA) + self.assertEqual(1, len(routers)) + self._set_agent_admin_state_up(L3_HOSTA, False) + routers = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA) + if keep_services: + self.assertEqual(1, len(routers)) + else: + self.assertEqual(0, len(routers)) + + def test_l3_agent_keep_services_off(self): + self._test_sync_routers_from_admin_state_down_agent(False) + + def test_l3_agent_keep_services_on(self): + self._test_sync_routers_from_admin_state_down_agent(True) + def test_list_routers_hosted_by_l3_agent_with_invalid_agent(self): invalid_agentid = 'non_existing_agent' self._list_routers_hosted_by_l3_agent(invalid_agentid,