Unify logic that determines liveliness of DHCP agent

For DHCP agents sometimes it's not enough to check agent's last heartbeat
time because in its starting period the agent may fail to send state reports
because it's busy processing networks.
In rescheduling logic such DHCP agent is given additional time after start.
Additional time is proportional to amount of networks the agent is hosting.
Need to apply the same logic to DHCP agent scheduler to avoid a case
when starting agent is considered dead and a network gets more hosting
agents than configured.

Change-Id: I0fe6244c7d2ed42e4744351be34f251318322c54
Closes-Bug: #1417708
This commit is contained in:
Eugene Nikanorov 2015-02-04 15:05:36 +03:00
parent b672a19c35
commit 28fe7e85cc
4 changed files with 41 additions and 13 deletions

View File

@ -156,7 +156,12 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
self.setup_agent_status_check(self.remove_networks_from_down_agents)
def _agent_starting_up(self, context, agent):
def is_eligible_agent(self, context, active, agent):
# eligible agent is active or starting up
return (AgentSchedulerDbMixin.is_eligible_agent(active, agent) or
self.agent_starting_up(context, agent))
def agent_starting_up(self, context, agent):
"""Check if agent was just started.
Method returns True if agent is in its 'starting up' period.
@ -217,7 +222,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
for binding in bindings:
agent_id = binding.dhcp_agent['id']
if agent_id not in checked_agents:
if self._agent_starting_up(context, binding.dhcp_agent):
if self.agent_starting_up(context, binding.dhcp_agent):
# When agent starts and it has many networks to process
# it may fail to send state reports in defined interval.
# The server will consider it dead and try to remove
@ -287,8 +292,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
return [binding.dhcp_agent
for binding in query
if AgentSchedulerDbMixin.is_eligible_agent(active,
binding.dhcp_agent)]
if self.is_eligible_agent(context, active,
binding.dhcp_agent)]
def add_network_to_dhcp_agent(self, context, id, network_id):
self._get_network(context, network_id)

View File

@ -81,9 +81,8 @@ class ChanceScheduler(object):
return
active_dhcp_agents = [
agent for agent in set(enabled_dhcp_agents)
if not agents_db.AgentDbMixin.is_agent_down(
agent['heartbeat_timestamp'])
and agent not in dhcp_agents
if agent not in dhcp_agents and plugin.is_eligible_agent(
context, True, agent)
]
if not active_dhcp_agents:
LOG.warn(_LW('No more DHCP agents'))

View File

@ -32,6 +32,7 @@ from neutron.api.v2 import attributes
from neutron.common import constants
from neutron import context
from neutron.db import agents_db
from neutron.db import agentschedulers_db
from neutron.db import l3_agentschedulers_db
from neutron.extensions import agent
from neutron.extensions import dhcpagentscheduler
@ -478,6 +479,27 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
self._delete('ports', port2['port']['id'])
self.assertEqual(0, len(dhcp_agents['agents']))
def test_is_eligible_agent(self):
agent_startup = ('neutron.db.agentschedulers_db.'
'DhcpAgentSchedulerDbMixin.agent_starting_up')
is_eligible_agent = ('neutron.db.agentschedulers_db.'
'AgentSchedulerDbMixin.is_eligible_agent')
dhcp_mixin = agentschedulers_db.DhcpAgentSchedulerDbMixin()
with contextlib.nested(
mock.patch(agent_startup),
mock.patch(is_eligible_agent)
) as (startup, elig):
tests = [(True, True),
(True, False),
(False, True),
(False, False)]
for rv1, rv2 in tests:
startup.return_value = rv1
elig.return_value = rv2
self.assertEqual(rv1 or rv2,
dhcp_mixin.is_eligible_agent(None,
None, None))
def test_network_scheduler_with_down_agent(self):
dhcp_hosta = {
'binary': 'neutron-dhcp-agent',
@ -488,17 +510,19 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
},
'agent_type': constants.AGENT_TYPE_DHCP}
self._register_one_agent_state(dhcp_hosta)
is_agent_down_str = 'neutron.db.agents_db.AgentDbMixin.is_agent_down'
with mock.patch(is_agent_down_str) as mock_is_agent_down:
mock_is_agent_down.return_value = False
eligible_agent_str = ('neutron.db.agentschedulers_db.'
'DhcpAgentSchedulerDbMixin.is_eligible_agent')
with mock.patch(eligible_agent_str) as eligible_agent:
eligible_agent.return_value = True
with self.port() as port:
dhcp_agents = self._list_dhcp_agents_hosting_network(
port['port']['network_id'])
self._delete('ports', port['port']['id'])
self._delete('networks', port['port']['network_id'])
self.assertEqual(1, len(dhcp_agents['agents']))
with mock.patch(is_agent_down_str) as mock_is_agent_down:
mock_is_agent_down.return_value = True
with mock.patch(eligible_agent_str) as eligible_agent:
eligible_agent.return_value = False
with self.port() as port:
dhcp_agents = self._list_dhcp_agents_hosting_network(
port['port']['network_id'])

View File

@ -243,7 +243,7 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
dhcp_agent={'id': 'id2'}),
sched_db.NetworkDhcpAgentBinding(network_id='foo4',
dhcp_agent={'id': 'id2'})]
with mock.patch.object(self, '_agent_starting_up',
with mock.patch.object(self, 'agent_starting_up',
side_effect=[True, False]):
res = [b for b in self._filter_bindings(None, bindings)]
# once per each agent id1 and id2