Unify logic that determines liveliness of DHCP agent
For DHCP agents sometimes it's not enough to check agent's last heartbeat time because in its starting period the agent may fail to send state reports because it's busy processing networks. In rescheduling logic such DHCP agent is given additional time after start. Additional time is proportional to amount of networks the agent is hosting. Need to apply the same logic to DHCP agent scheduler to avoid a case when starting agent is considered dead and a network gets more hosting agents than configured. Change-Id: I0fe6244c7d2ed42e4744351be34f251318322c54 Closes-Bug: #1417708
This commit is contained in:
parent
b672a19c35
commit
28fe7e85cc
|
@ -156,7 +156,12 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
|||
|
||||
self.setup_agent_status_check(self.remove_networks_from_down_agents)
|
||||
|
||||
def _agent_starting_up(self, context, agent):
|
||||
def is_eligible_agent(self, context, active, agent):
|
||||
# eligible agent is active or starting up
|
||||
return (AgentSchedulerDbMixin.is_eligible_agent(active, agent) or
|
||||
self.agent_starting_up(context, agent))
|
||||
|
||||
def agent_starting_up(self, context, agent):
|
||||
"""Check if agent was just started.
|
||||
|
||||
Method returns True if agent is in its 'starting up' period.
|
||||
|
@ -217,7 +222,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
|||
for binding in bindings:
|
||||
agent_id = binding.dhcp_agent['id']
|
||||
if agent_id not in checked_agents:
|
||||
if self._agent_starting_up(context, binding.dhcp_agent):
|
||||
if self.agent_starting_up(context, binding.dhcp_agent):
|
||||
# When agent starts and it has many networks to process
|
||||
# it may fail to send state reports in defined interval.
|
||||
# The server will consider it dead and try to remove
|
||||
|
@ -287,8 +292,8 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
|||
|
||||
return [binding.dhcp_agent
|
||||
for binding in query
|
||||
if AgentSchedulerDbMixin.is_eligible_agent(active,
|
||||
binding.dhcp_agent)]
|
||||
if self.is_eligible_agent(context, active,
|
||||
binding.dhcp_agent)]
|
||||
|
||||
def add_network_to_dhcp_agent(self, context, id, network_id):
|
||||
self._get_network(context, network_id)
|
||||
|
|
|
@ -81,9 +81,8 @@ class ChanceScheduler(object):
|
|||
return
|
||||
active_dhcp_agents = [
|
||||
agent for agent in set(enabled_dhcp_agents)
|
||||
if not agents_db.AgentDbMixin.is_agent_down(
|
||||
agent['heartbeat_timestamp'])
|
||||
and agent not in dhcp_agents
|
||||
if agent not in dhcp_agents and plugin.is_eligible_agent(
|
||||
context, True, agent)
|
||||
]
|
||||
if not active_dhcp_agents:
|
||||
LOG.warn(_LW('No more DHCP agents'))
|
||||
|
|
|
@ -32,6 +32,7 @@ from neutron.api.v2 import attributes
|
|||
from neutron.common import constants
|
||||
from neutron import context
|
||||
from neutron.db import agents_db
|
||||
from neutron.db import agentschedulers_db
|
||||
from neutron.db import l3_agentschedulers_db
|
||||
from neutron.extensions import agent
|
||||
from neutron.extensions import dhcpagentscheduler
|
||||
|
@ -478,6 +479,27 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
|
|||
self._delete('ports', port2['port']['id'])
|
||||
self.assertEqual(0, len(dhcp_agents['agents']))
|
||||
|
||||
def test_is_eligible_agent(self):
|
||||
agent_startup = ('neutron.db.agentschedulers_db.'
|
||||
'DhcpAgentSchedulerDbMixin.agent_starting_up')
|
||||
is_eligible_agent = ('neutron.db.agentschedulers_db.'
|
||||
'AgentSchedulerDbMixin.is_eligible_agent')
|
||||
dhcp_mixin = agentschedulers_db.DhcpAgentSchedulerDbMixin()
|
||||
with contextlib.nested(
|
||||
mock.patch(agent_startup),
|
||||
mock.patch(is_eligible_agent)
|
||||
) as (startup, elig):
|
||||
tests = [(True, True),
|
||||
(True, False),
|
||||
(False, True),
|
||||
(False, False)]
|
||||
for rv1, rv2 in tests:
|
||||
startup.return_value = rv1
|
||||
elig.return_value = rv2
|
||||
self.assertEqual(rv1 or rv2,
|
||||
dhcp_mixin.is_eligible_agent(None,
|
||||
None, None))
|
||||
|
||||
def test_network_scheduler_with_down_agent(self):
|
||||
dhcp_hosta = {
|
||||
'binary': 'neutron-dhcp-agent',
|
||||
|
@ -488,17 +510,19 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
|
|||
},
|
||||
'agent_type': constants.AGENT_TYPE_DHCP}
|
||||
self._register_one_agent_state(dhcp_hosta)
|
||||
is_agent_down_str = 'neutron.db.agents_db.AgentDbMixin.is_agent_down'
|
||||
with mock.patch(is_agent_down_str) as mock_is_agent_down:
|
||||
mock_is_agent_down.return_value = False
|
||||
eligible_agent_str = ('neutron.db.agentschedulers_db.'
|
||||
'DhcpAgentSchedulerDbMixin.is_eligible_agent')
|
||||
with mock.patch(eligible_agent_str) as eligible_agent:
|
||||
eligible_agent.return_value = True
|
||||
with self.port() as port:
|
||||
dhcp_agents = self._list_dhcp_agents_hosting_network(
|
||||
port['port']['network_id'])
|
||||
self._delete('ports', port['port']['id'])
|
||||
self._delete('networks', port['port']['network_id'])
|
||||
self.assertEqual(1, len(dhcp_agents['agents']))
|
||||
with mock.patch(is_agent_down_str) as mock_is_agent_down:
|
||||
mock_is_agent_down.return_value = True
|
||||
|
||||
with mock.patch(eligible_agent_str) as eligible_agent:
|
||||
eligible_agent.return_value = False
|
||||
with self.port() as port:
|
||||
dhcp_agents = self._list_dhcp_agents_hosting_network(
|
||||
port['port']['network_id'])
|
||||
|
|
|
@ -243,7 +243,7 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
|
|||
dhcp_agent={'id': 'id2'}),
|
||||
sched_db.NetworkDhcpAgentBinding(network_id='foo4',
|
||||
dhcp_agent={'id': 'id2'})]
|
||||
with mock.patch.object(self, '_agent_starting_up',
|
||||
with mock.patch.object(self, 'agent_starting_up',
|
||||
side_effect=[True, False]):
|
||||
res = [b for b in self._filter_bindings(None, bindings)]
|
||||
# once per each agent id1 and id2
|
||||
|
|
Loading…
Reference in New Issue