Enable services on agents with admin_state_up False

Previously when admin_state_up of an agent is turned to False,
all services on it will be disabled.
This fix makes existing services on agents with admin_state_up
False keep available.
To keep current behavior available the following configuration
parameter added.

* enable_services_on_agents_with_admin_state_down

If the parameter is True, existing services on agents with admin_state_up
False keep available. No more service will be scheduled to the agent
automatically. But adding a service to the agent manually is available.
i.e. admin_state_up: False means to stop automatic scheduling under the
parameter is True.
The default of the parameter is False (current behavior).

Change-Id: Ifba606a5c1f3f07d717c7695a7a64e16238c2057
Closes-Bug: #1408488
This commit is contained in:
Itsuro Oda 2015-01-09 08:47:56 +09:00
parent 822a488976
commit 766c2738ae
8 changed files with 158 additions and 19 deletions

View File

@ -212,6 +212,14 @@ lock_path = $state_path/lock
# DHCP agents for configured networks.
# dhcp_agents_per_network = 1
# Enable services on agents with admin_state_up False.
# If this option is False, when admin_state_up of an agent is turned to
# False, services on it will be disabled. If this option is True, services
# on agents with admin_state_up False keep available and manual scheduling
# to such agents is available. Agents with admin_state_up False are not
# selected for automatic scheduling regardless of this option.
# enable_services_on_agents_with_admin_state_down = False
# =========== end of items for agent scheduler extension =====
# =========== items for l3 extension ==============

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log as logging
import oslo_messaging
@ -75,9 +76,11 @@ class DhcpAgentNotifyAPI(object):
return new_agents + existing_agents
def _get_enabled_agents(self, context, network, agents, method, payload):
"""Get the list of agents whose admin_state is UP."""
"""Get the list of agents who can provide services."""
network_id = network['id']
enabled_agents = [x for x in agents if x.admin_state_up]
enabled_agents = agents
if not cfg.CONF.enable_services_on_agents_with_admin_state_down:
enabled_agents = [x for x in agents if x.admin_state_up]
active_agents = [x for x in agents if x.is_active]
len_enabled_agents = len(enabled_agents)
len_active_agents = len(active_agents)

View File

@ -22,6 +22,7 @@ from neutron.common import constants
from neutron.common import rpc as n_rpc
from neutron.common import topics
from neutron.common import utils
from neutron.db import agentschedulers_db
from neutron.i18n import _LE
from neutron import manager
from neutron.plugins.common import constants as service_constants
@ -51,10 +52,11 @@ class L3AgentNotifyAPI(object):
adminContext = context if context.is_admin else context.elevated()
plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)
state = agentschedulers_db.get_admin_state_up_filter()
for router_id in router_ids:
l3_agents = plugin.get_l3_agents_hosting_routers(
adminContext, [router_id],
admin_state_up=True,
admin_state_up=state,
active=True)
if shuffle_agents:
random.shuffle(l3_agents)
@ -78,10 +80,11 @@ class L3AgentNotifyAPI(object):
context or context.elevated())
plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)
state = agentschedulers_db.get_admin_state_up_filter()
l3_agents = (plugin.
get_l3_agents_hosting_routers(adminContext,
[router_id],
admin_state_up=True,
admin_state_up=state,
active=True))
# TODO(murali): replace cast with fanout to avoid performance
# issues at greater scale.

View File

@ -19,6 +19,7 @@ from neutron.common import constants
from neutron.common import rpc as n_rpc
from neutron.common import topics
from neutron.common import utils
from neutron.db import agentschedulers_db
from neutron import manager
from neutron.plugins.common import constants as service_constants
@ -40,10 +41,11 @@ class MeteringAgentNotifyAPI(object):
service_constants.L3_ROUTER_NAT)
l3_routers = {}
state = agentschedulers_db.get_admin_state_up_filter()
for router in routers:
l3_agents = plugin.get_l3_agents_hosting_routers(
adminContext, [router['id']],
admin_state_up=True,
admin_state_up=state,
active=True)
for l3_agent in l3_agents:
LOG.debug('Notify metering agent at %(topic)s.%(host)s '

View File

@ -49,6 +49,15 @@ AGENTS_SCHEDULER_OPTS = [
'agents.')),
cfg.IntOpt('dhcp_agents_per_network', default=1,
help=_('Number of DHCP agents scheduled to host a network.')),
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
default=False,
help=_('Enable services on an agent with admin_state_up '
'False. If this option is False, when admin_state_up '
'of an agent is turned False, services on it will be '
'disabled. Agents with admin_state_up False are not '
'selected for automatic scheduling regardless of this '
'option. But manual scheduling to such agents is '
'available if this option is True.')),
]
cfg.CONF.register_opts(AGENTS_SCHEDULER_OPTS)
@ -314,7 +323,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
with context.session.begin(subtransactions=True):
agent_db = self._get_agent(context, id)
if (agent_db['agent_type'] != constants.AGENT_TYPE_DHCP or
not agent_db['admin_state_up']):
not services_available(agent_db['admin_state_up'])):
raise dhcpagentscheduler.InvalidDHCPAgent(id=id)
dhcp_agents = self.get_dhcp_agents_hosting_networks(
context, [network_id])
@ -384,7 +393,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
LOG.debug("DHCP Agent not found on host %s", host)
return []
if not agent.admin_state_up:
if not services_available(agent.admin_state_up):
return []
query = context.session.query(NetworkDhcpAgentBinding.network_id)
query = query.filter(NetworkDhcpAgentBinding.dhcp_agent_id == agent.id)
@ -416,3 +425,19 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
def auto_schedule_networks(self, context, host):
if self.network_scheduler:
self.network_scheduler.auto_schedule_networks(self, context, host)
# helper functions for readability.
def services_available(admin_state_up):
if cfg.CONF.enable_services_on_agents_with_admin_state_down:
# Services are available regardless admin_state_up
return True
return admin_state_up
def get_admin_state_up_filter():
if cfg.CONF.enable_services_on_agents_with_admin_state_down:
# Avoid filtering on admin_state_up at all
return None
# Filters on admin_state_up is True
return True

View File

@ -155,9 +155,10 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
is_wrong_type_or_unsuitable_agent = (
agent['agent_type'] != constants.AGENT_TYPE_L3 or
not agent['admin_state_up'] or
not self.get_l3_agent_candidates(context, router, [agent])
)
not agentschedulers_db.services_available(agent['admin_state_up'])
or
not self.get_l3_agent_candidates(context, router, [agent],
ignore_admin_state=True))
if is_wrong_type_or_unsuitable_agent:
raise l3agentscheduler.InvalidL3Agent(id=agent['id'])
@ -287,7 +288,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
self, context, host, router_ids):
agent = self._get_agent_by_type_and_host(
context, constants.AGENT_TYPE_L3, host)
if not agent.admin_state_up:
if not agentschedulers_db.services_available(agent.admin_state_up):
return []
query = context.session.query(RouterL3AgentBinding.router_id)
query = query.filter(
@ -429,11 +430,14 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
candidates.append(l3_agent)
return candidates
def get_l3_agent_candidates(self, context, sync_router, l3_agents):
def get_l3_agent_candidates(self, context, sync_router, l3_agents,
ignore_admin_state=False):
"""Get the valid l3 agents for the router from a list of l3_agents."""
candidates = []
for l3_agent in l3_agents:
if not l3_agent.admin_state_up:
if not ignore_admin_state and not l3_agent.admin_state_up:
# ignore_admin_state True comes from manual scheduling
# where admin_state_up judgement is already done.
continue
agent_conf = self.get_configuration_dict(l3_agent)
router_id = agent_conf.get('router_id', None)

View File

@ -21,6 +21,7 @@ from oslo_utils import timeutils
from neutron.api.rpc.agentnotifiers import dhcp_rpc_agent_api
from neutron.common import utils
from neutron.db import agents_db
from neutron.db.agentschedulers_db import cfg
from neutron.tests import base
@ -87,11 +88,14 @@ class TestDhcpAgentNotifyAPI(base.BaseTestCase):
self.assertEqual(expected_errors, self.mock_log.error.call_count)
def test__get_enabled_agents(self):
agent = agents_db.Agent()
agent.admin_state_up = True
agent.heartbeat_timestamp = timeutils.utcnow()
agent1 = agents_db.Agent()
agent1.admin_state_up = True
agent1.heartbeat_timestamp = timeutils.utcnow()
agent2 = agents_db.Agent()
agent2.admin_state_up = False
agent2.heartbeat_timestamp = timeutils.utcnow()
network = {'id': 'foo_network_id'}
self._test__get_enabled_agents(network, agents=[agent])
self._test__get_enabled_agents(network, agents=[agent1])
def test__get_enabled_agents_with_inactive_ones(self):
agent1 = agents_db.Agent()
@ -111,6 +115,18 @@ class TestDhcpAgentNotifyAPI(base.BaseTestCase):
self._test__get_enabled_agents(network, [], port_count=20,
expected_warnings=0, expected_errors=1)
def test__get_enabled_agents_with_admin_state_down(self):
cfg.CONF.set_override(
'enable_services_on_agents_with_admin_state_down', True)
agent1 = agents_db.Agent()
agent1.admin_state_up = True
agent1.heartbeat_timestamp = timeutils.utcnow()
agent2 = agents_db.Agent()
agent2.admin_state_up = False
agent2.heartbeat_timestamp = timeutils.utcnow()
network = {'id': 'foo_network_id'}
self._test__get_enabled_agents(network, agents=[agent1, agent2])
def test__notify_agents_fanout_required(self):
self.notifier._notify_agents(mock.ANY,
'network_delete_end',

View File

@ -584,11 +584,13 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
expected_code=exc.HTTPForbidden.code,
admin_context=False)
def test_network_add_to_dhcp_agent(self):
def _test_network_add_to_dhcp_agent(self, admin_state_up=True):
with self.network() as net1:
self._register_agent_states()
hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP,
DHCP_HOSTA)
if not admin_state_up:
self._set_agent_admin_state_up(DHCP_HOSTA, False)
num_before_add = len(
self._list_networks_hosted_by_dhcp_agent(
hosta_id)['networks'])
@ -600,6 +602,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
self.assertEqual(0, num_before_add)
self.assertEqual(1, num_after_add)
def test_network_add_to_dhcp_agent(self):
self._test_network_add_to_dhcp_agent()
def test_network_add_to_dhcp_agent_with_admin_state_down(self):
cfg.CONF.set_override(
'enable_services_on_agents_with_admin_state_down', True)
self._test_network_add_to_dhcp_agent(admin_state_up=False)
def test_network_remove_from_dhcp_agent(self):
dhcp_hosta = {
'binary': 'neutron-dhcp-agent',
@ -654,6 +664,39 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
self.assertEqual(port_list['ports'][0]['device_id'],
constants.DEVICE_ID_RESERVED_DHCP_PORT)
def _test_get_active_networks_from_admin_state_down_agent(self,
keep_services):
if keep_services:
cfg.CONF.set_override(
'enable_services_on_agents_with_admin_state_down', True)
dhcp_hosta = {
'binary': 'neutron-dhcp-agent',
'host': DHCP_HOSTA,
'topic': 'DHCP_AGENT',
'configurations': {'dhcp_driver': 'dhcp_driver',
'use_namespaces': True,
},
'agent_type': constants.AGENT_TYPE_DHCP}
self._register_one_agent_state(dhcp_hosta)
dhcp_rpc_cb = dhcp_rpc.DhcpRpcCallback()
with self.port():
nets = dhcp_rpc_cb.get_active_networks(self.adminContext,
host=DHCP_HOSTA)
self.assertEqual(1, len(nets))
self._set_agent_admin_state_up(DHCP_HOSTA, False)
nets = dhcp_rpc_cb.get_active_networks(self.adminContext,
host=DHCP_HOSTA)
if keep_services:
self.assertEqual(1, len(nets))
else:
self.assertEqual(0, len(nets))
def test_dhcp_agent_keep_services_off(self):
self._test_get_active_networks_from_admin_state_down_agent(False)
def test_dhcp_agent_keep_services_on(self):
self._test_get_active_networks_from_admin_state_down_agent(True)
def _take_down_agent_and_run_reschedule(self, host):
# take down the agent on host A and ensure B is alive
self.adminContext.session.begin(subtransactions=True)
@ -1106,11 +1149,13 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
None)
self._delete('routers', router['router']['id'])
def test_router_add_to_l3_agent(self):
def _test_router_add_to_l3_agent(self, admin_state_up=True):
with self.router() as router1:
self._register_agent_states()
hosta_id = self._get_agent_id(constants.AGENT_TYPE_L3,
L3_HOSTA)
if not admin_state_up:
self._set_agent_admin_state_up(L3_HOSTA, False)
num_before_add = len(
self._list_routers_hosted_by_l3_agent(
hosta_id)['routers'])
@ -1127,6 +1172,14 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
self.assertEqual(0, num_before_add)
self.assertEqual(1, num_after_add)
def test_router_add_to_l3_agent(self):
self._test_router_add_to_l3_agent()
def test_router_add_to_l3_agent_with_admin_state_down(self):
cfg.CONF.set_override(
'enable_services_on_agents_with_admin_state_down', True)
self._test_router_add_to_l3_agent(admin_state_up=False)
def test_router_add_to_l3_agent_two_times(self):
with self.router() as router1:
self._register_agent_states()
@ -1174,6 +1227,31 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
expected_code=exc.HTTPForbidden.code,
admin_context=False)
def _test_sync_routers_from_admin_state_down_agent(self, keep_services):
if keep_services:
cfg.CONF.set_override(
'enable_services_on_agents_with_admin_state_down', True)
l3_rpc_cb = l3_rpc.L3RpcCallback()
self._register_agent_states()
hosta_id = self._get_agent_id(constants.AGENT_TYPE_L3, L3_HOSTA)
with self.router() as router:
self._add_router_to_l3_agent(hosta_id,
router['router']['id'])
routers = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA)
self.assertEqual(1, len(routers))
self._set_agent_admin_state_up(L3_HOSTA, False)
routers = l3_rpc_cb.sync_routers(self.adminContext, host=L3_HOSTA)
if keep_services:
self.assertEqual(1, len(routers))
else:
self.assertEqual(0, len(routers))
def test_l3_agent_keep_services_off(self):
self._test_sync_routers_from_admin_state_down_agent(False)
def test_l3_agent_keep_services_on(self):
self._test_sync_routers_from_admin_state_down_agent(True)
def test_list_routers_hosted_by_l3_agent_with_invalid_agent(self):
invalid_agentid = 'non_existing_agent'
self._list_routers_hosted_by_l3_agent(invalid_agentid,