# Copyright 2014 Cisco Systems, Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import random from keystoneclient import exceptions as k_exceptions from keystoneclient.v2_0 import client as k_client from oslo.config import cfg from oslo.utils import importutils from oslo.utils import timeutils from sqlalchemy.orm import exc from sqlalchemy.orm import joinedload from neutron.common import exceptions as n_exc from neutron.common import utils from neutron import context as neutron_context from neutron.db import agents_db from neutron.i18n import _LE, _LI, _LW from neutron import manager from neutron.openstack.common import log as logging from neutron.openstack.common import uuidutils from neutron.plugins.cisco.common import cisco_constants as c_constants from neutron.plugins.cisco.db.l3 import l3_models from neutron.plugins.cisco.l3 import service_vm_lib from neutron.plugins.common import constants as svc_constants LOG = logging.getLogger(__name__) DEVICE_HANDLING_OPTS = [ cfg.StrOpt('l3_admin_tenant', default='L3AdminTenant', help=_('Name of the L3 admin tenant.')), cfg.StrOpt('management_network', default='osn_mgmt_nw', help=_('Name of management network for device configuration. ' 'Default value is osn_mgmt_nw')), cfg.StrOpt('default_security_group', default='mgmt_sec_grp', help=_('Default security group applied on management port. ' 'Default value is mgmt_sec_grp.')), cfg.IntOpt('cfg_agent_down_time', default=60, help=_('Seconds of no status update until a cfg agent ' 'is considered down.')), cfg.BoolOpt('ensure_nova_running', default=True, help=_('Ensure that Nova is running before attempting to ' 'create any VM.')) ] CSR1KV_OPTS = [ cfg.StrOpt('csr1kv_image', default='csr1kv_openstack_img', help=_('Name of Glance image for CSR1kv.')), cfg.StrOpt('csr1kv_flavor', default=621, help=_('UUID of Nova flavor for CSR1kv.')), cfg.StrOpt('csr1kv_plugging_driver', default=('neutron.plugins.cisco.l3.plugging_drivers.' 'n1kv_trunking_driver.N1kvTrunkingPlugDriver'), help=_('Plugging driver for CSR1kv.')), cfg.StrOpt('csr1kv_device_driver', default=('neutron.plugins.cisco.l3.hosting_device_drivers.' 'csr1kv_hd_driver.CSR1kvHostingDeviceDriver'), help=_('Hosting device driver for CSR1kv.')), cfg.StrOpt('csr1kv_cfgagent_router_driver', default=('neutron.plugins.cisco.cfg_agent.device_drivers.' 'csr1kv.csr1kv_routing_driver.CSR1kvRoutingDriver'), help=_('Config agent driver for CSR1kv.')), cfg.IntOpt('csr1kv_booting_time', default=420, help=_('Booting time in seconds before a CSR1kv ' 'becomes operational.')), cfg.StrOpt('csr1kv_username', default='stack', help=_('Username to use for CSR1kv configurations.')), cfg.StrOpt('csr1kv_password', default='cisco', help=_('Password to use for CSR1kv configurations.')) ] cfg.CONF.register_opts(DEVICE_HANDLING_OPTS, "general") cfg.CONF.register_opts(CSR1KV_OPTS, "hosting_devices") class DeviceHandlingMixin(object): """A class implementing some functionality to handle devices.""" # The all-mighty tenant owning all hosting devices _l3_tenant_uuid = None # The management network for hosting devices _mgmt_nw_uuid = None _mgmt_sec_grp_id = None # Loaded driver modules for CSR1kv _hosting_device_driver = None _plugging_driver = None # Service VM manager object that interacts with Nova _svc_vm_mgr = None # Flag indicating is needed Nova services are reported as up. _nova_running = False @classmethod def l3_tenant_id(cls): """Returns id of tenant owning hosting device resources.""" if cls._l3_tenant_uuid is None: auth_url = cfg.CONF.keystone_authtoken.auth_uri user = cfg.CONF.keystone_authtoken.admin_user pw = cfg.CONF.keystone_authtoken.admin_password tenant = cfg.CONF.keystone_authtoken.admin_tenant_name keystone = k_client.Client(username=user, password=pw, tenant_name=tenant, auth_url=auth_url) try: tenant = keystone.tenants.find( name=cfg.CONF.general.l3_admin_tenant) cls._l3_tenant_uuid = tenant.id except k_exceptions.NotFound: LOG.error(_LE('No tenant with a name or ID of %s exists.'), cfg.CONF.general.l3_admin_tenant) except k_exceptions.NoUniqueMatch: LOG.error(_LE('Multiple tenants matches found for %s'), cfg.CONF.general.l3_admin_tenant) return cls._l3_tenant_uuid @classmethod def mgmt_nw_id(cls): """Returns id of the management network.""" if cls._mgmt_nw_uuid is None: tenant_id = cls.l3_tenant_id() if not tenant_id: return net = manager.NeutronManager.get_plugin().get_networks( neutron_context.get_admin_context(), {'tenant_id': [tenant_id], 'name': [cfg.CONF.general.management_network]}, ['id', 'subnets']) if len(net) == 1: num_subnets = len(net[0]['subnets']) if num_subnets == 0: LOG.error(_LE('The virtual management network has no ' 'subnet. Please assign one.')) return elif num_subnets > 1: LOG.info(_LI('The virtual management network has %d ' 'subnets. The first one will be used.'), num_subnets) cls._mgmt_nw_uuid = net[0].get('id') elif len(net) > 1: # Management network must have a unique name. LOG.error(_LE('The virtual management network does not have ' 'unique name. Please ensure that it is.')) else: # Management network has not been created. LOG.error(_LE('There is no virtual management network. Please ' 'create one.')) return cls._mgmt_nw_uuid @classmethod def mgmt_sec_grp_id(cls): """Returns id of security group used by the management network.""" if not utils.is_extension_supported( manager.NeutronManager.get_plugin(), "security-group"): return if cls._mgmt_sec_grp_id is None: # Get the id for the _mgmt_security_group_id tenant_id = cls.l3_tenant_id() res = manager.NeutronManager.get_plugin().get_security_groups( neutron_context.get_admin_context(), {'tenant_id': [tenant_id], 'name': [cfg.CONF.general.default_security_group]}, ['id']) if len(res) == 1: cls._mgmt_sec_grp_id = res[0].get('id') elif len(res) > 1: # the mgmt sec group must be unique. LOG.error(_LE('The security group for the virtual management ' 'network does not have unique name. Please ensure ' 'that it is.')) else: # CSR Mgmt security group is not present. LOG.error(_LE('There is no security group for the virtual ' 'management network. Please create one.')) return cls._mgmt_sec_grp_id @classmethod def get_hosting_device_driver(cls): """Returns device driver.""" if cls._hosting_device_driver: return cls._hosting_device_driver else: try: cls._hosting_device_driver = importutils.import_object( cfg.CONF.hosting_devices.csr1kv_device_driver) except (ImportError, TypeError, n_exc.NeutronException): LOG.exception(_LE('Error loading hosting device driver')) return cls._hosting_device_driver @classmethod def get_hosting_device_plugging_driver(cls): """Returns plugging driver.""" if cls._plugging_driver: return cls._plugging_driver else: try: cls._plugging_driver = importutils.import_object( cfg.CONF.hosting_devices.csr1kv_plugging_driver) except (ImportError, TypeError, n_exc.NeutronException): LOG.exception(_LE('Error loading plugging driver')) return cls._plugging_driver def get_hosting_devices_qry(self, context, hosting_device_ids, load_agent=True): """Returns hosting devices with .""" query = context.session.query(l3_models.HostingDevice) if load_agent: query = query.options(joinedload('cfg_agent')) if len(hosting_device_ids) > 1: query = query.filter(l3_models.HostingDevice.id.in_( hosting_device_ids)) else: query = query.filter(l3_models.HostingDevice.id == hosting_device_ids[0]) return query def handle_non_responding_hosting_devices(self, context, host, hosting_device_ids): with context.session.begin(subtransactions=True): e_context = context.elevated() hosting_devices = self.get_hosting_devices_qry( e_context, hosting_device_ids).all() # 'hosting_info' is dictionary with ids of removed hosting # devices and the affected logical resources for each # removed hosting device: # {'hd_id1': {'routers': [id1, id2, ...], # 'fw': [id1, ...], # ...}, # 'hd_id2': {'routers': [id3, id4, ...]}, # 'fw': [id1, ...], # ...}, # ...} hosting_info = dict((id, {}) for id in hosting_device_ids) try: #TODO(bobmel): Modify so service plugins register themselves self._handle_non_responding_hosting_devices( context, hosting_devices, hosting_info) except AttributeError: pass for hd in hosting_devices: if not self._process_non_responsive_hosting_device(e_context, hd): # exclude this device since we did not remove it del hosting_info[hd['id']] self.l3_cfg_rpc_notifier.hosting_devices_removed( context, hosting_info, False, host) def get_device_info_for_agent(self, hosting_device): """Returns information about needed by config agent. Convenience function that service plugins can use to populate their resources with information about the device hosting their logical resource. """ credentials = {'username': cfg.CONF.hosting_devices.csr1kv_username, 'password': cfg.CONF.hosting_devices.csr1kv_password} mgmt_ip = (hosting_device.management_port['fixed_ips'][0]['ip_address'] if hosting_device.management_port else None) return {'id': hosting_device.id, 'credentials': credentials, 'management_ip_address': mgmt_ip, 'protocol_port': hosting_device.protocol_port, 'created_at': str(hosting_device.created_at), 'booting_time': cfg.CONF.hosting_devices.csr1kv_booting_time, 'cfg_agent_id': hosting_device.cfg_agent_id} @classmethod def is_agent_down(cls, heart_beat_time, timeout=cfg.CONF.general.cfg_agent_down_time): return timeutils.is_older_than(heart_beat_time, timeout) def get_cfg_agents_for_hosting_devices(self, context, hosting_device_ids, admin_state_up=None, active=None, schedule=False): if not hosting_device_ids: return [] query = self.get_hosting_devices_qry(context, hosting_device_ids) if admin_state_up is not None: query = query.filter( agents_db.Agent.admin_state_up == admin_state_up) if schedule: agents = [] for hosting_device in query: if hosting_device.cfg_agent is None: agent = self._select_cfgagent(context, hosting_device) if agent is not None: agents.append(agent) else: agents.append(hosting_device.cfg_agent) else: agents = [hosting_device.cfg_agent for hosting_device in query if hosting_device.cfg_agent is not None] if active is not None: agents = [a for a in agents if not self.is_agent_down(a['heartbeat_timestamp'])] return agents def auto_schedule_hosting_devices(self, context, agent_host): """Schedules unassociated hosting devices to Cisco cfg agent. Schedules hosting devices to agent running on . """ with context.session.begin(subtransactions=True): # Check if there is a valid Cisco cfg agent on the host query = context.session.query(agents_db.Agent) query = query.filter_by(agent_type=c_constants.AGENT_TYPE_CFG, host=agent_host, admin_state_up=True) try: cfg_agent = query.one() except (exc.MultipleResultsFound, exc.NoResultFound): LOG.debug('No enabled Cisco cfg agent on host %s', agent_host) return False if self.is_agent_down( cfg_agent.heartbeat_timestamp): LOG.warning(_LW('Cisco cfg agent %s is not alive'), cfg_agent.id) query = context.session.query(l3_models.HostingDevice) query = query.filter_by(cfg_agent_id=None) for hd in query: hd.cfg_agent = cfg_agent context.session.add(hd) return True def _setup_device_handling(self): auth_url = cfg.CONF.keystone_authtoken.auth_uri u_name = cfg.CONF.keystone_authtoken.admin_user pw = cfg.CONF.keystone_authtoken.admin_password tenant = cfg.CONF.general.l3_admin_tenant self._svc_vm_mgr = service_vm_lib.ServiceVMManager( user=u_name, passwd=pw, l3_admin_tenant=tenant, auth_url=auth_url) def _process_non_responsive_hosting_device(self, context, hosting_device): """Host type specific processing of non responsive hosting devices. :param hosting_device: db object for hosting device :return: True if hosting_device has been deleted, otherwise False """ self._delete_service_vm_hosting_device(context, hosting_device) return True def _create_csr1kv_vm_hosting_device(self, context): """Creates a CSR1kv VM instance.""" # Note(bobmel): Nova does not handle VM dispatching well before all # its services have started. This creates problems for the Neutron # devstack script that creates a Neutron router, which in turn # triggers service VM dispatching. # Only perform pool maintenance if needed Nova services have started if (cfg.CONF.general.ensure_nova_running and not self._nova_running): if self._svc_vm_mgr.nova_services_up(): self.__class__._nova_running = True else: LOG.info(_LI('Not all Nova services are up and running. ' 'Skipping this CSR1kv vm create request.')) return plugging_drv = self.get_hosting_device_plugging_driver() hosting_device_drv = self.get_hosting_device_driver() if plugging_drv is None or hosting_device_drv is None: return # These resources are owned by the L3AdminTenant complementary_id = uuidutils.generate_uuid() dev_data = {'complementary_id': complementary_id, 'device_id': 'CSR1kv', 'admin_state_up': True, 'protocol_port': 22, 'created_at': timeutils.utcnow()} res = plugging_drv.create_hosting_device_resources( context, complementary_id, self.l3_tenant_id(), self.mgmt_nw_id(), self.mgmt_sec_grp_id(), 1) if res.get('mgmt_port') is None: # Required ports could not be created return vm_instance = self._svc_vm_mgr.dispatch_service_vm( context, 'CSR1kv_nrouter', cfg.CONF.hosting_devices.csr1kv_image, cfg.CONF.hosting_devices.csr1kv_flavor, hosting_device_drv, res['mgmt_port'], res.get('ports')) with context.session.begin(subtransactions=True): if vm_instance is not None: dev_data.update( {'id': vm_instance['id'], 'management_port_id': res['mgmt_port']['id']}) hosting_device = self._create_hosting_device( context, {'hosting_device': dev_data}) else: # Fundamental error like could not contact Nova # Cleanup anything we created plugging_drv.delete_hosting_device_resources( context, self.l3_tenant_id(), **res) return LOG.info(_LI('Created a CSR1kv hosting device VM')) return hosting_device def _delete_service_vm_hosting_device(self, context, hosting_device): """Deletes a service VM. This will indirectly make all of its hosted resources unscheduled. """ if hosting_device is None: return plugging_drv = self.get_hosting_device_plugging_driver() if plugging_drv is None: return res = plugging_drv.get_hosting_device_resources( context, hosting_device['id'], hosting_device['complementary_id'], self.l3_tenant_id(), self.mgmt_nw_id()) if not self._svc_vm_mgr.delete_service_vm(context, hosting_device['id']): LOG.error(_LE('Failed to delete hosting device %s service VM. ' 'Will un-register it anyway.'), hosting_device['id']) plugging_drv.delete_hosting_device_resources( context, self.l3_tenant_id(), **res) with context.session.begin(subtransactions=True): context.session.delete(hosting_device) def _create_hosting_device(self, context, hosting_device): LOG.debug('create_hosting_device() called') hd = hosting_device['hosting_device'] tenant_id = self._get_tenant_id_for_create(context, hd) with context.session.begin(subtransactions=True): hd_db = l3_models.HostingDevice( id=hd.get('id') or uuidutils.generate_uuid(), complementary_id=hd.get('complementary_id'), tenant_id=tenant_id, device_id=hd.get('device_id'), admin_state_up=hd.get('admin_state_up', True), management_port_id=hd['management_port_id'], protocol_port=hd.get('protocol_port'), cfg_agent_id=hd.get('cfg_agent_id'), created_at=hd.get('created_at', timeutils.utcnow()), status=hd.get('status', svc_constants.ACTIVE)) context.session.add(hd_db) return hd_db def _select_cfgagent(self, context, hosting_device): """Selects Cisco cfg agent that will configure .""" if not hosting_device: LOG.debug('Hosting device to schedule not specified') return elif hosting_device.cfg_agent: LOG.debug('Hosting device %(hd_id)s has already been ' 'assigned to Cisco cfg agent %(agent_id)s', {'hd_id': id, 'agent_id': hosting_device.cfg_agent.id}) return with context.session.begin(subtransactions=True): active_cfg_agents = self._get_cfg_agents(context, active=True) if not active_cfg_agents: LOG.warning(_LW('There are no active Cisco cfg agents')) # No worries, once a Cisco cfg agent is started and # announces itself any "dangling" hosting devices # will be scheduled to it. return chosen_agent = random.choice(active_cfg_agents) hosting_device.cfg_agent = chosen_agent context.session.add(hosting_device) return chosen_agent def _get_cfg_agents(self, context, active=None, filters=None): query = context.session.query(agents_db.Agent) query = query.filter( agents_db.Agent.agent_type == c_constants.AGENT_TYPE_CFG) if active is not None: query = (query.filter(agents_db.Agent.admin_state_up == active)) if filters: for key, value in filters.iteritems(): column = getattr(agents_db.Agent, key, None) if column: query = query.filter(column.in_(value)) cfg_agents = query.all() if active is not None: cfg_agents = [cfg_agent for cfg_agent in cfg_agents if not self.is_agent_down( cfg_agent['heartbeat_timestamp'])] return cfg_agents