Auto reschedule loadbalancers from dead agents
Similarly to what was done in 'allow_automatic_l3agent_failover' This patch adds a periodic check to examine the status of all LBaaS agents with loadbalancers scheduled to them. If the agent is dead, the loadbalancer will be rescheduled to an alive agent. The periodic LBaaS agents status check is invoked by add_agent_status_check_worker() which was introduced in neutron codebase in I3a32a95489831f0d862930384309eefdc881d8f6 to allow safe process forking. Closes-Bug: #1565511 Depends-On: I652ab029b7427c8783e4b2f0443a89ee884bf064 Change-Id: Id8d3218bf1e52722cc10ddcd34e3e734eef90658
This commit is contained in:
parent
375201eddd
commit
6ef87fe033
|
@ -97,6 +97,21 @@ class LbaasAgentSchedulerDbMixin(agentschedulers_db.AgentSchedulerDbMixin,
|
|||
candidates.append(agent)
|
||||
return candidates
|
||||
|
||||
def get_down_loadbalancer_bindings(self, context, agent_dead_limit):
|
||||
cutoff = self.get_cutoff_time(agent_dead_limit)
|
||||
return (context.session.query(LoadbalancerAgentBinding).join(
|
||||
agents_db.Agent).filter(
|
||||
agents_db.Agent.heartbeat_timestamp < cutoff,
|
||||
agents_db.Agent.admin_state_up))
|
||||
|
||||
def _unschedule_loadbalancer(self, context, loadbalancer_id, agent_id):
|
||||
with context.session.begin(subtransactions=True):
|
||||
query = context.session.query(LoadbalancerAgentBinding)
|
||||
query = query.filter(
|
||||
LoadbalancerAgentBinding.loadbalancer_id == loadbalancer_id,
|
||||
LoadbalancerAgentBinding.agent_id == agent_id)
|
||||
query.delete()
|
||||
|
||||
|
||||
class ChanceScheduler(object):
|
||||
"""Allocate a loadbalancer agent for a vip in a random way."""
|
||||
|
|
|
@ -49,3 +49,8 @@ class MisMatchedKey(TLSException):
|
|||
|
||||
class CertificateStorageException(TLSException):
|
||||
message = _LE('Could not store certificate: %(msg)s')
|
||||
|
||||
|
||||
class LoadbalancerReschedulingFailed(exceptions.Conflict):
|
||||
message = _LE("Failed rescheduling loadbalancer %(loadbalancer_id)s: "
|
||||
"no eligible lbaas agent found.")
|
||||
|
|
|
@ -14,19 +14,25 @@
|
|||
|
||||
from neutron.common import rpc as n_rpc
|
||||
from neutron.db import agents_db
|
||||
from neutron.db import common_db_mixin
|
||||
from neutron.services import provider_configuration as provconf
|
||||
from neutron_lib import exceptions as n_exc
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import oslo_messaging as messaging
|
||||
from oslo_utils import importutils
|
||||
|
||||
from neutron_lbaas._i18n import _
|
||||
from neutron_lbaas import agent_scheduler as agent_scheduler_v2
|
||||
from neutron_lbaas.common import exceptions
|
||||
from neutron_lbaas.db.loadbalancer import loadbalancer_dbv2 as ldbv2
|
||||
from neutron_lbaas.drivers.common import agent_callbacks
|
||||
from neutron_lbaas.drivers import driver_base
|
||||
from neutron_lbaas.extensions import lbaas_agentschedulerv2
|
||||
from neutron_lbaas.services.loadbalancer import constants as lb_const
|
||||
from neutron_lbaas.services.loadbalancer import data_models
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
LB_SCHEDULERS = 'loadbalancer_schedulers'
|
||||
|
||||
|
@ -35,6 +41,11 @@ AGENT_SCHEDULER_OPTS = [
|
|||
default='neutron_lbaas.agent_scheduler.ChanceScheduler',
|
||||
help=_('Driver to use for scheduling '
|
||||
'to a default loadbalancer agent')),
|
||||
cfg.BoolOpt('allow_automatic_lbaas_agent_failover',
|
||||
default=False,
|
||||
help=_('Automatically reschedule loadbalancer from offline '
|
||||
'to online lbaas agents. This is only supported for '
|
||||
'drivers who use the neutron LBaaSv2 agent')),
|
||||
]
|
||||
|
||||
cfg.CONF.register_opts(AGENT_SCHEDULER_OPTS)
|
||||
|
@ -144,7 +155,46 @@ class LoadBalancerAgentApi(object):
|
|||
healthmonitor=healthmonitor)
|
||||
|
||||
|
||||
class LoadBalancerManager(driver_base.BaseLoadBalancerManager):
|
||||
class LoadBalancerManager(driver_base.BaseLoadBalancerManager,
|
||||
agent_scheduler_v2.LbaasAgentSchedulerDbMixin,
|
||||
common_db_mixin.CommonDbMixin):
|
||||
def __init__(self, driver):
|
||||
self.driver = driver
|
||||
self.db = ldbv2.LoadBalancerPluginDbv2()
|
||||
|
||||
def reschedule_lbaas_from_down_agents(self):
|
||||
"""Reschedule lbaas from down lbaasv2 agents if admin state is up."""
|
||||
self.reschedule_resources_from_down_agents(
|
||||
agent_type=lb_const.AGENT_TYPE_LOADBALANCERV2,
|
||||
get_down_bindings=self.get_down_loadbalancer_bindings,
|
||||
agent_id_attr='agent_id',
|
||||
resource_id_attr='loadbalancer_id',
|
||||
resource_name='loadbalancer',
|
||||
reschedule_resource=self.reschedule_loadbalancer,
|
||||
rescheduling_failed=exceptions.LoadbalancerReschedulingFailed)
|
||||
|
||||
def reschedule_loadbalancer(self, context, loadbalancer_id):
|
||||
"""Reschedule loadbalancer to a new lbaas agent
|
||||
|
||||
Remove the loadbalancer from the agent currently hosting it and
|
||||
schedule it again
|
||||
"""
|
||||
cur_agent = self.get_agent_hosting_loadbalancer(context,
|
||||
loadbalancer_id)
|
||||
agent_data = cur_agent['agent']
|
||||
with context.session.begin(subtransactions=True):
|
||||
self._unschedule_loadbalancer(context, loadbalancer_id,
|
||||
agent_data['id'])
|
||||
self._schedule_loadbalancer(context, loadbalancer_id)
|
||||
new_agent = self.get_agent_hosting_loadbalancer(context,
|
||||
loadbalancer_id)
|
||||
if not new_agent:
|
||||
raise exceptions.LoadbalancerReschedulingFailed(
|
||||
loadbalancer_id=loadbalancer_id)
|
||||
|
||||
def _schedule_loadbalancer(self, context, loadbalancer_id):
|
||||
lb_db = self.db.get_loadbalancer(context, loadbalancer_id)
|
||||
self.create(context, lb_db)
|
||||
|
||||
def update(self, context, old_loadbalancer, loadbalancer):
|
||||
super(LoadBalancerManager, self).update(context, old_loadbalancer,
|
||||
|
@ -334,6 +384,13 @@ class AgentDriverBase(driver_base.LoadBalancerBaseDriver):
|
|||
self.loadbalancer_scheduler = importutils.import_object(
|
||||
lb_sched_driver)
|
||||
|
||||
def get_periodic_jobs(self):
|
||||
periodic_jobs = []
|
||||
if cfg.CONF.allow_automatic_lbaas_agent_failover:
|
||||
periodic_jobs.append(
|
||||
self.load_balancer.reschedule_lbaas_from_down_agents)
|
||||
return periodic_jobs
|
||||
|
||||
def start_rpc_listeners(self):
|
||||
# other agent based plugin driver might already set callbacks on plugin
|
||||
if hasattr(self.plugin, 'agent_callbacks'):
|
||||
|
|
|
@ -21,9 +21,11 @@ from neutron_lib.plugins import directory
|
|||
from neutron.api.v2 import attributes as attrs
|
||||
from neutron.api.v2 import base as napi_base
|
||||
from neutron import context as ncontext
|
||||
from neutron.db import agentschedulers_db
|
||||
from neutron.db import servicetype_db as st_db
|
||||
from neutron.extensions import flavors
|
||||
from neutron.plugins.common import constants
|
||||
from neutron import service
|
||||
from neutron.services.flavors import flavors_plugin
|
||||
from neutron.services import provider_configuration as pconf
|
||||
from neutron.services import service_base
|
||||
|
@ -55,7 +57,8 @@ def add_provider_configuration(type_manager, service_type):
|
|||
pconf.ProviderConfiguration('neutron_lbaas'))
|
||||
|
||||
|
||||
class LoadBalancerPluginv2(loadbalancerv2.LoadBalancerPluginBaseV2):
|
||||
class LoadBalancerPluginv2(loadbalancerv2.LoadBalancerPluginBaseV2,
|
||||
agentschedulers_db.AgentSchedulerDbMixin):
|
||||
"""Implementation of the Neutron Loadbalancer Service Plugin.
|
||||
|
||||
This class manages the workflow of LBaaS request/response.
|
||||
|
@ -82,8 +85,17 @@ class LoadBalancerPluginv2(loadbalancerv2.LoadBalancerPluginBaseV2):
|
|||
add_provider_configuration(
|
||||
self.service_type_manager, constants.LOADBALANCERV2)
|
||||
self._load_drivers()
|
||||
self.start_periodic_jobs()
|
||||
self.start_rpc_listeners()
|
||||
self.db.subscribe()
|
||||
rpc_worker = service.RpcWorker([self], worker_process_count=0)
|
||||
self.add_worker(rpc_worker)
|
||||
|
||||
def start_periodic_jobs(self):
|
||||
for driver_name, driver_class in self.drivers.items():
|
||||
if hasattr(driver_class, 'get_periodic_jobs'):
|
||||
for job in self.drivers[driver_name].get_periodic_jobs():
|
||||
self.add_agent_status_check_worker(job)
|
||||
|
||||
def start_rpc_listeners(self):
|
||||
listeners = []
|
||||
|
|
|
@ -14,16 +14,21 @@
|
|||
|
||||
import mock
|
||||
from neutron_lib.plugins import directory
|
||||
from oslo_utils import importutils
|
||||
|
||||
from neutron import context
|
||||
from neutron.db import servicetype_db as st_db
|
||||
from neutron.plugins.common import constants
|
||||
from neutron.tests.common import helpers
|
||||
|
||||
from neutron_lbaas.common import exceptions
|
||||
from neutron_lbaas.db.loadbalancer import models
|
||||
from neutron_lbaas.drivers.common import agent_driver_base
|
||||
from neutron_lbaas.extensions import loadbalancerv2
|
||||
from neutron_lbaas.services.loadbalancer import constants as lb_const
|
||||
from neutron_lbaas.tests import base
|
||||
from neutron_lbaas.tests.unit.db.loadbalancer import test_db_loadbalancerv2
|
||||
from neutron_lbaas.tests.unit import test_agent_scheduler
|
||||
|
||||
|
||||
class TestLoadBalancerPluginBase(test_db_loadbalancerv2.LbaasPluginDbTestCase):
|
||||
|
@ -578,3 +583,145 @@ class TestLoadBalancerPluginNotificationWrapper(TestLoadBalancerPluginBase):
|
|||
loadbalancerv2.EntityNotFound,
|
||||
self.plugin_instance.db.get_healthmonitor,
|
||||
ctx, hm_id)
|
||||
|
||||
|
||||
class TestLoadBalancerManager(test_agent_scheduler.
|
||||
LBaaSAgentSchedulerTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestLoadBalancerManager, self).setUp()
|
||||
self.load_balancer = agent_driver_base.LoadBalancerManager(self)
|
||||
self.agent_rpc = agent_driver_base.LoadBalancerAgentApi(
|
||||
lb_const.LOADBALANCER_AGENTV2)
|
||||
self.plugin = self.lbaas_plugin
|
||||
self.device_driver = 'haproxy_ns'
|
||||
self.loadbalancer_scheduler = importutils.import_object(
|
||||
'neutron_lbaas.agent_scheduler.ChanceScheduler')
|
||||
|
||||
def test_reschedule_lbaas_from_down_agents(self):
|
||||
with mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager.reschedule_resources_from_down_agents'
|
||||
) as mock_reschedule_resources:
|
||||
self.load_balancer.reschedule_lbaas_from_down_agents()
|
||||
self.assertTrue(mock_reschedule_resources.called)
|
||||
mock_reschedule_resources.assert_called_once_with(
|
||||
agent_type=lb_const.AGENT_TYPE_LOADBALANCERV2,
|
||||
get_down_bindings=(self.load_balancer.
|
||||
get_down_loadbalancer_bindings),
|
||||
agent_id_attr='agent_id',
|
||||
resource_id_attr='loadbalancer_id',
|
||||
resource_name='loadbalancer',
|
||||
reschedule_resource=self.load_balancer.reschedule_loadbalancer,
|
||||
rescheduling_failed=exceptions.LoadbalancerReschedulingFailed)
|
||||
|
||||
def test_loadbalancer_reschedule_from_dead_lbaas_agent(self):
|
||||
self._register_agent_states(lbaas_agents=True)
|
||||
with self.loadbalancer() as loadbalancer:
|
||||
loadbalancer_data = loadbalancer['loadbalancer']
|
||||
self.plugin.db.update_loadbalancer_provisioning_status(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
original_agent = self._get_lbaas_agent_hosting_loadbalancer(
|
||||
loadbalancer_data['id'])
|
||||
self.assertIsNotNone(original_agent)
|
||||
helpers.kill_agent(original_agent['agent']['id'])
|
||||
self.load_balancer.reschedule_lbaas_from_down_agents()
|
||||
rescheduled_agent = self._get_lbaas_agent_hosting_loadbalancer(
|
||||
loadbalancer_data['id'])
|
||||
self.assertNotEqual(original_agent, rescheduled_agent)
|
||||
|
||||
def test_reschedule_loadbalancer_succeeded(self):
|
||||
self._register_agent_states(lbaas_agents=True)
|
||||
with self.loadbalancer() as loadbalancer:
|
||||
loadbalancer_data = loadbalancer['loadbalancer']
|
||||
self.plugin.db.update_loadbalancer_provisioning_status(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
hosting_agent = self.load_balancer.get_agent_hosting_loadbalancer(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
with mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager.get_agent_hosting_loadbalancer',
|
||||
side_effect=(hosting_agent, hosting_agent)
|
||||
) as mock_get_agent_hosting_lb, mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager._unschedule_loadbalancer',
|
||||
side_effect=self.load_balancer._unschedule_loadbalancer
|
||||
) as mock_unschedule_lb, mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager._schedule_loadbalancer',
|
||||
side_effect=self.load_balancer._schedule_loadbalancer
|
||||
) as mock_schedule_lb:
|
||||
# rescheduling is expected to succeeded
|
||||
self.load_balancer.reschedule_loadbalancer(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
# check the usage of get_agent_hosting_loadbalancer()
|
||||
self.assertTrue(mock_get_agent_hosting_lb.called)
|
||||
mock_get_agent_hosting_lb.assert_called_with(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
# check the usage of _unschedule_loadbalancer()
|
||||
self.assertTrue(mock_unschedule_lb.called)
|
||||
mock_unschedule_lb.assert_called_once_with(
|
||||
self.adminContext, loadbalancer_data['id'],
|
||||
hosting_agent['agent']['id'])
|
||||
# check the usage of _schedule_loadbalancer()
|
||||
self.assertTrue(mock_schedule_lb.called)
|
||||
mock_schedule_lb.assert_called_once_with(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
|
||||
def test_reschedule_loadbalancer_failed(self):
|
||||
self._register_agent_states(lbaas_agents=True)
|
||||
with self.loadbalancer() as loadbalancer:
|
||||
loadbalancer_data = loadbalancer['loadbalancer']
|
||||
self.plugin.db.update_loadbalancer_provisioning_status(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
hosting_agent = self.load_balancer.get_agent_hosting_loadbalancer(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
with mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager.get_agent_hosting_loadbalancer',
|
||||
side_effect=(hosting_agent, None)
|
||||
) as mock_get_agent_hosting_lb, mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager._unschedule_loadbalancer',
|
||||
side_effect=self.load_balancer._unschedule_loadbalancer
|
||||
) as mock_unschedule_lb, mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager._schedule_loadbalancer',
|
||||
side_effect=self.load_balancer._schedule_loadbalancer
|
||||
) as mock_schedule_lb:
|
||||
# rescheduling is expected to fail
|
||||
self.assertRaises(exceptions.LoadbalancerReschedulingFailed,
|
||||
self.load_balancer.reschedule_loadbalancer,
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
# check the usage of get_agent_hosting_loadbalancer()
|
||||
self.assertTrue(mock_get_agent_hosting_lb.called)
|
||||
mock_get_agent_hosting_lb.assert_called_with(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
# check the usage of _unschedule_loadbalancer()
|
||||
self.assertTrue(mock_unschedule_lb.called)
|
||||
mock_unschedule_lb.assert_called_once_with(
|
||||
self.adminContext, loadbalancer_data['id'],
|
||||
hosting_agent['agent']['id'])
|
||||
# check the usage of _schedule_loadbalancer()
|
||||
self.assertTrue(mock_schedule_lb.called)
|
||||
mock_schedule_lb.assert_called_once_with(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
|
||||
def test__schedule_loadbalancer(self):
|
||||
self._register_agent_states(lbaas_agents=True)
|
||||
with self.loadbalancer() as loadbalancer:
|
||||
loadbalancer_data = loadbalancer['loadbalancer']
|
||||
self.plugin.db.update_loadbalancer_provisioning_status(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
with mock.patch(
|
||||
'neutron_lbaas.db.loadbalancer.loadbalancer_dbv2.'
|
||||
'LoadBalancerPluginDbv2.get_loadbalancer') as mock_get_lb,\
|
||||
mock.patch(
|
||||
'neutron_lbaas.drivers.common.agent_driver_base.'
|
||||
'LoadBalancerManager.create') as mock_create:
|
||||
self.load_balancer._schedule_loadbalancer(
|
||||
self.adminContext, loadbalancer_data['id'])
|
||||
self.assertTrue(mock_get_lb.called)
|
||||
mock_get_lb.assert_called_once_with(self.adminContext,
|
||||
loadbalancer_data['id'])
|
||||
self.assertTrue(mock_create.called)
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
features:
|
||||
- Adds the ability to automatically reschedule load
|
||||
balancers from LBaaS agents the server detects to have
|
||||
died. Previously, load balancers could be scheduled
|
||||
and realized across multiple LBaaS agents, however
|
||||
if a hypervisor died, the load balancers scheduled
|
||||
to that node would cease operation. Now, these load
|
||||
balancers will be automatically rescheduled to a
|
||||
different agent. This feature is turned off by
|
||||
default and controlled via
|
||||
allow_automatic_lbaas_agent_failover
|
Loading…
Reference in New Issue