diff --git a/nova/api/metadata/base.py b/nova/api/metadata/base.py new file mode 100644 index 00000000..f83bcf8e --- /dev/null +++ b/nova/api/metadata/base.py @@ -0,0 +1,759 @@ +# Copyright 2010 United States Government as represented by the +# Administrator of the National Aeronautics and Space Administration. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Instance Metadata information.""" + +import os +import posixpath + +from oslo_log import log as logging +from oslo_serialization import base64 +from oslo_serialization import jsonutils +from oslo_utils import importutils +from oslo_utils import timeutils +import six + +from nova.api.ec2 import ec2utils +from nova.api.metadata import password +from nova.api.metadata import vendordata +from nova.api.metadata import vendordata_dynamic +from nova.api.metadata import vendordata_json +from nova import block_device +from nova.cells import opts as cells_opts +from nova.cells import rpcapi as cells_rpcapi +import nova.conf +from nova import context +from nova import exception +from nova import network +from nova.network.security_group import openstack_driver +from nova import objects +from nova.objects import virt_device_metadata as metadata_obj +from nova import utils +from nova.virt import netutils + + +CONF = nova.conf.CONF + +VERSIONS = [ + '1.0', + '2007-01-19', + '2007-03-01', + '2007-08-29', + '2007-10-10', + '2007-12-15', + '2008-02-01', + '2008-09-01', + '2009-04-04', +] + +# NOTE(mikal): think of these strings as version numbers. They traditionally +# correlate with OpenStack release dates, with all the changes for a given +# release bundled into a single version. Note that versions in the future are +# hidden from the listing, but can still be requested explicitly, which is +# required for testing purposes. We know this isn't great, but its inherited +# from EC2, which this needs to be compatible with. +FOLSOM = '2012-08-10' +GRIZZLY = '2013-04-04' +HAVANA = '2013-10-17' +LIBERTY = '2015-10-15' +NEWTON_ONE = '2016-06-30' +NEWTON_TWO = '2016-10-06' +OCATA = '2017-02-22' + +OPENSTACK_VERSIONS = [ + FOLSOM, + GRIZZLY, + HAVANA, + LIBERTY, + NEWTON_ONE, + NEWTON_TWO, + OCATA, +] + +VERSION = "version" +CONTENT = "content" +CONTENT_DIR = "content" +MD_JSON_NAME = "meta_data.json" +VD_JSON_NAME = "vendor_data.json" +VD2_JSON_NAME = "vendor_data2.json" +NW_JSON_NAME = "network_data.json" +UD_NAME = "user_data" +PASS_NAME = "password" +MIME_TYPE_TEXT_PLAIN = "text/plain" +MIME_TYPE_APPLICATION_JSON = "application/json" + +LOG = logging.getLogger(__name__) + + +class InvalidMetadataVersion(Exception): + pass + + +class InvalidMetadataPath(Exception): + pass + + +class InstanceMetadata(object): + """Instance metadata.""" + + def __init__(self, instance, address=None, content=None, extra_md=None, + network_info=None, vd_driver=None, network_metadata=None, + request_context=None): + """Creation of this object should basically cover all time consuming + collection. Methods after that should not cause time delays due to + network operations or lengthy cpu operations. + + The user should then get a single instance and make multiple method + calls on it. + """ + if not content: + content = [] + + ctxt = context.get_admin_context() + + # NOTE(danms): Sanitize the instance to limit the amount of stuff + # inside that may not pickle well (i.e. context). We also touch + # some of the things we'll lazy load later to make sure we keep their + # values in what we cache. + instance.ec2_ids + instance.keypairs + instance.device_metadata + instance = objects.Instance.obj_from_primitive( + instance.obj_to_primitive()) + + # The default value of mimeType is set to MIME_TYPE_TEXT_PLAIN + self.set_mimetype(MIME_TYPE_TEXT_PLAIN) + self.instance = instance + self.extra_md = extra_md + + self.availability_zone = instance.get('availability_zone') + + secgroup_api = openstack_driver.get_openstack_security_group_driver() + self.security_groups = secgroup_api.get_instance_security_groups( + ctxt, instance) + + self.mappings = _format_instance_mapping(ctxt, instance) + + if instance.user_data is not None: + self.userdata_raw = base64.decode_as_bytes(instance.user_data) + else: + self.userdata_raw = None + + self.address = address + + # expose instance metadata. + self.launch_metadata = utils.instance_meta(instance) + + self.password = password.extract_password(instance) + + self.uuid = instance.uuid + + self.content = {} + self.files = [] + + # get network info, and the rendered network template + if network_info is None: + network_info = instance.info_cache.network_info + + # expose network metadata + if network_metadata is None: + self.network_metadata = netutils.get_network_metadata(network_info) + else: + self.network_metadata = network_metadata + + self.ip_info = \ + ec2utils.get_ip_info_for_instance_from_nw_info(network_info) + + self.network_config = None + cfg = netutils.get_injected_network_template(network_info) + + if cfg: + key = "%04i" % len(self.content) + self.content[key] = cfg + self.network_config = {"name": "network_config", + 'content_path': "/%s/%s" % (CONTENT_DIR, key)} + + # 'content' is passed in from the configdrive code in + # nova/virt/libvirt/driver.py. That's how we get the injected files + # (personalities) in. AFAIK they're not stored in the db at all, + # so are not available later (web service metadata time). + for (path, contents) in content: + key = "%04i" % len(self.content) + self.files.append({'path': path, + 'content_path': "/%s/%s" % (CONTENT_DIR, key)}) + self.content[key] = contents + + if vd_driver is None: + vdclass = importutils.import_class(CONF.vendordata_driver) + else: + vdclass = vd_driver + + self.vddriver = vdclass(instance=instance, address=address, + extra_md=extra_md, network_info=network_info) + + self.route_configuration = None + + # NOTE(mikal): the decision to not pass extra_md here like we + # do to the StaticJSON driver is deliberate. extra_md will + # contain the admin password for the instance, and we shouldn't + # pass that to external services. + self.vendordata_providers = { + 'StaticJSON': vendordata_json.JsonFileVendorData( + instance=instance, address=address, + extra_md=extra_md, network_info=network_info), + 'DynamicJSON': vendordata_dynamic.DynamicVendorData( + instance=instance, address=address, + network_info=network_info, context=request_context) + } + + def _route_configuration(self): + if self.route_configuration: + return self.route_configuration + + path_handlers = {UD_NAME: self._user_data, + PASS_NAME: self._password, + VD_JSON_NAME: self._vendor_data, + VD2_JSON_NAME: self._vendor_data2, + MD_JSON_NAME: self._metadata_as_json, + NW_JSON_NAME: self._network_data, + VERSION: self._handle_version, + CONTENT: self._handle_content} + + self.route_configuration = RouteConfiguration(path_handlers) + return self.route_configuration + + def set_mimetype(self, mime_type): + self.md_mimetype = mime_type + + def get_mimetype(self): + return self.md_mimetype + + def get_ec2_metadata(self, version): + if version == "latest": + version = VERSIONS[-1] + + if version not in VERSIONS: + raise InvalidMetadataVersion(version) + + hostname = self._get_hostname() + + floating_ips = self.ip_info['floating_ips'] + floating_ip = floating_ips and floating_ips[0] or '' + + fixed_ips = self.ip_info['fixed_ips'] + fixed_ip = fixed_ips and fixed_ips[0] or '' + + fmt_sgroups = [x['name'] for x in self.security_groups] + + meta_data = { + 'ami-id': self.instance.ec2_ids.ami_id, + 'ami-launch-index': self.instance.launch_index, + 'ami-manifest-path': 'FIXME', + 'instance-id': self.instance.ec2_ids.instance_id, + 'hostname': hostname, + 'local-ipv4': fixed_ip or self.address, + 'reservation-id': self.instance.reservation_id, + 'security-groups': fmt_sgroups} + + # public keys are strangely rendered in ec2 metadata service + # meta-data/public-keys/ returns '0=keyname' (with no trailing /) + # and only if there is a public key given. + # '0=keyname' means there is a normally rendered dict at + # meta-data/public-keys/0 + # + # meta-data/public-keys/ : '0=%s' % keyname + # meta-data/public-keys/0/ : 'openssh-key' + # meta-data/public-keys/0/openssh-key : '%s' % publickey + if self.instance.key_name: + meta_data['public-keys'] = { + '0': {'_name': "0=" + self.instance.key_name, + 'openssh-key': self.instance.key_data}} + + if self._check_version('2007-01-19', version): + meta_data['local-hostname'] = hostname + meta_data['public-hostname'] = hostname + meta_data['public-ipv4'] = floating_ip + + if False and self._check_version('2007-03-01', version): + # TODO(vish): store product codes + meta_data['product-codes'] = [] + + if self._check_version('2007-08-29', version): + instance_type = self.instance.get_flavor() + meta_data['instance-type'] = instance_type['name'] + + if False and self._check_version('2007-10-10', version): + # TODO(vish): store ancestor ids + meta_data['ancestor-ami-ids'] = [] + + if self._check_version('2007-12-15', version): + meta_data['block-device-mapping'] = self.mappings + if self.instance.ec2_ids.kernel_id: + meta_data['kernel-id'] = self.instance.ec2_ids.kernel_id + if self.instance.ec2_ids.ramdisk_id: + meta_data['ramdisk-id'] = self.instance.ec2_ids.ramdisk_id + + if self._check_version('2008-02-01', version): + meta_data['placement'] = {'availability-zone': + self.availability_zone} + + if self._check_version('2008-09-01', version): + meta_data['instance-action'] = 'none' + + data = {'meta-data': meta_data} + if self.userdata_raw is not None: + data['user-data'] = self.userdata_raw + + return data + + def get_ec2_item(self, path_tokens): + # get_ec2_metadata returns dict without top level version + data = self.get_ec2_metadata(path_tokens[0]) + return find_path_in_tree(data, path_tokens[1:]) + + def get_openstack_item(self, path_tokens): + if path_tokens[0] == CONTENT_DIR: + return self._handle_content(path_tokens) + return self._route_configuration().handle_path(path_tokens) + + def _metadata_as_json(self, version, path): + metadata = {'uuid': self.uuid} + if self.launch_metadata: + metadata['meta'] = self.launch_metadata + if self.files: + metadata['files'] = self.files + if self.extra_md: + metadata.update(self.extra_md) + if self.network_config: + metadata['network_config'] = self.network_config + + if self.instance.key_name: + if cells_opts.get_cell_type() == 'compute': + cells_api = cells_rpcapi.CellsAPI() + try: + keypair = cells_api.get_keypair_at_top( + context.get_admin_context(), self.instance.user_id, + self.instance.key_name) + except exception.KeypairNotFound: + # NOTE(lpigueir): If keypair was deleted, treat + # it like it never had any + keypair = None + else: + keypairs = self.instance.keypairs + # NOTE(mriedem): It's possible for the keypair to be deleted + # before it was migrated to the instance_extra table, in which + # case lazy-loading instance.keypairs will handle the 404 and + # just set an empty KeyPairList object on the instance. + keypair = keypairs[0] if keypairs else None + + if keypair: + metadata['public_keys'] = { + keypair.name: keypair.public_key, + } + + metadata['keys'] = [ + {'name': keypair.name, + 'type': keypair.type, + 'data': keypair.public_key} + ] + else: + LOG.debug("Unable to find keypair for instance with " + "key name '%s'.", self.instance.key_name, + instance=self.instance) + + metadata['hostname'] = self._get_hostname() + metadata['name'] = self.instance.display_name + metadata['launch_index'] = self.instance.launch_index + metadata['availability_zone'] = self.availability_zone + + if self._check_os_version(GRIZZLY, version): + metadata['random_seed'] = base64.encode_as_text(os.urandom(512)) + + if self._check_os_version(LIBERTY, version): + metadata['project_id'] = self.instance.project_id + + if self._check_os_version(NEWTON_ONE, version): + metadata['devices'] = self._get_device_metadata(version) + + self.set_mimetype(MIME_TYPE_APPLICATION_JSON) + return jsonutils.dump_as_bytes(metadata) + + def _get_device_metadata(self, version): + """Build a device metadata dict based on the metadata objects. This is + done here in the metadata API as opposed to in the objects themselves + because the metadata dict is part of the guest API and thus must be + controlled. + """ + device_metadata_list = [] + vif_vlans_supported = self._check_os_version(OCATA, version) + if self.instance.device_metadata is not None: + for device in self.instance.device_metadata.devices: + device_metadata = {} + bus = 'none' + address = 'none' + + if 'bus' in device: + # TODO(artom/mriedem) It would be nice if we had something + # more generic, like a type identifier or something, built + # into these types of objects, like a get_meta_type() + # abstract method on the base DeviceBus class. + if isinstance(device.bus, metadata_obj.PCIDeviceBus): + bus = 'pci' + elif isinstance(device.bus, metadata_obj.USBDeviceBus): + bus = 'usb' + elif isinstance(device.bus, metadata_obj.SCSIDeviceBus): + bus = 'scsi' + elif isinstance(device.bus, metadata_obj.IDEDeviceBus): + bus = 'ide' + elif isinstance(device.bus, metadata_obj.XenDeviceBus): + bus = 'xen' + else: + LOG.debug('Metadata for device with unknown bus %s ' + 'has not been included in the ' + 'output', device.bus.__class__.__name__) + continue + if 'address' in device.bus: + address = device.bus.address + + if isinstance(device, metadata_obj.NetworkInterfaceMetadata): + vlan = None + if vif_vlans_supported and 'vlan' in device: + vlan = device.vlan + + # Skip devices without tags on versions that + # don't support vlans + if not (vlan or 'tags' in device): + continue + + device_metadata['type'] = 'nic' + device_metadata['mac'] = device.mac + if vlan: + device_metadata['vlan'] = vlan + elif isinstance(device, metadata_obj.DiskMetadata): + device_metadata['type'] = 'disk' + # serial and path are optional parameters + if 'serial' in device: + device_metadata['serial'] = device.serial + if 'path' in device: + device_metadata['path'] = device.path + else: + LOG.debug('Metadata for device of unknown type %s has not ' + 'been included in the ' + 'output', device.__class__.__name__) + continue + + device_metadata['bus'] = bus + device_metadata['address'] = address + if 'tags' in device: + device_metadata['tags'] = device.tags + + device_metadata_list.append(device_metadata) + return device_metadata_list + + def _handle_content(self, path_tokens): + if len(path_tokens) == 1: + raise KeyError("no listing for %s" % "/".join(path_tokens)) + if len(path_tokens) != 2: + raise KeyError("Too many tokens for /%s" % CONTENT_DIR) + return self.content[path_tokens[1]] + + def _handle_version(self, version, path): + # request for /version, give a list of what is available + ret = [MD_JSON_NAME] + if self.userdata_raw is not None: + ret.append(UD_NAME) + if self._check_os_version(GRIZZLY, version): + ret.append(PASS_NAME) + if self._check_os_version(HAVANA, version): + ret.append(VD_JSON_NAME) + if self._check_os_version(LIBERTY, version): + ret.append(NW_JSON_NAME) + if self._check_os_version(NEWTON_TWO, version): + ret.append(VD2_JSON_NAME) + + return ret + + def _user_data(self, version, path): + if self.userdata_raw is None: + raise KeyError(path) + return self.userdata_raw + + def _network_data(self, version, path): + if self.network_metadata is None: + return jsonutils.dump_as_bytes({}) + return jsonutils.dump_as_bytes(self.network_metadata) + + def _password(self, version, path): + if self._check_os_version(GRIZZLY, version): + return password.handle_password + raise KeyError(path) + + def _vendor_data(self, version, path): + if self._check_os_version(HAVANA, version): + self.set_mimetype(MIME_TYPE_APPLICATION_JSON) + + # NOTE(mikal): backwards compatibility... If the deployer has + # specified providers, and one of those providers is StaticJSON, + # then do that thing here. Otherwise, if the deployer has + # specified an old style driver here, then use that. This second + # bit can be removed once old style vendordata is fully deprecated + # and removed. + if (CONF.api.vendordata_providers and + 'StaticJSON' in CONF.api.vendordata_providers): + return jsonutils.dump_as_bytes( + self.vendordata_providers['StaticJSON'].get()) + else: + # TODO(mikal): when we removed the old style vendordata + # drivers, we need to remove self.vddriver as well. + return jsonutils.dump_as_bytes(self.vddriver.get()) + + raise KeyError(path) + + def _vendor_data2(self, version, path): + if self._check_os_version(NEWTON_TWO, version): + self.set_mimetype(MIME_TYPE_APPLICATION_JSON) + + j = {} + for provider in CONF.api.vendordata_providers: + if provider == 'StaticJSON': + j['static'] = self.vendordata_providers['StaticJSON'].get() + else: + values = self.vendordata_providers[provider].get() + for key in list(values): + if key in j: + LOG.warning('Removing duplicate metadata key: %s', + key, instance=self.instance) + del values[key] + j.update(values) + + return jsonutils.dump_as_bytes(j) + + raise KeyError(path) + + def _check_version(self, required, requested, versions=VERSIONS): + return versions.index(requested) >= versions.index(required) + + def _check_os_version(self, required, requested): + return self._check_version(required, requested, OPENSTACK_VERSIONS) + + def _get_hostname(self): + return "%s%s%s" % (self.instance.hostname, + '.' if CONF.dhcp_domain else '', + CONF.dhcp_domain) + + def lookup(self, path): + if path == "" or path[0] != "/": + path = posixpath.normpath("/" + path) + else: + path = posixpath.normpath(path) + + # Set default mimeType. It will be modified only if there is a change + self.set_mimetype(MIME_TYPE_TEXT_PLAIN) + + # fix up requests, prepending /ec2 to anything that does not match + path_tokens = path.split('/')[1:] + if path_tokens[0] not in ("ec2", "openstack"): + if path_tokens[0] == "": + # request for / + path_tokens = ["ec2"] + else: + path_tokens = ["ec2"] + path_tokens + path = "/" + "/".join(path_tokens) + + # all values of 'path' input starts with '/' and have no trailing / + + # specifically handle the top level request + if len(path_tokens) == 1: + if path_tokens[0] == "openstack": + # NOTE(vish): don't show versions that are in the future + today = timeutils.utcnow().strftime("%Y-%m-%d") + versions = [v for v in OPENSTACK_VERSIONS if v <= today] + if OPENSTACK_VERSIONS != versions: + LOG.debug("future versions %s hidden in version list", + [v for v in OPENSTACK_VERSIONS + if v not in versions], instance=self.instance) + versions += ["latest"] + else: + versions = VERSIONS + ["latest"] + return versions + + try: + if path_tokens[0] == "openstack": + data = self.get_openstack_item(path_tokens[1:]) + else: + data = self.get_ec2_item(path_tokens[1:]) + except (InvalidMetadataVersion, KeyError): + raise InvalidMetadataPath(path) + + return data + + def metadata_for_config_drive(self): + """Yields (path, value) tuples for metadata elements.""" + # EC2 style metadata + for version in VERSIONS + ["latest"]: + if version in CONF.api.config_drive_skip_versions.split(' '): + continue + + data = self.get_ec2_metadata(version) + if 'user-data' in data: + filepath = os.path.join('ec2', version, 'user-data') + yield (filepath, data['user-data']) + del data['user-data'] + + try: + del data['public-keys']['0']['_name'] + except KeyError: + pass + + filepath = os.path.join('ec2', version, 'meta-data.json') + yield (filepath, jsonutils.dump_as_bytes(data['meta-data'])) + + ALL_OPENSTACK_VERSIONS = OPENSTACK_VERSIONS + ["latest"] + for version in ALL_OPENSTACK_VERSIONS: + path = 'openstack/%s/%s' % (version, MD_JSON_NAME) + yield (path, self.lookup(path)) + + path = 'openstack/%s/%s' % (version, UD_NAME) + if self.userdata_raw is not None: + yield (path, self.lookup(path)) + + if self._check_version(HAVANA, version, ALL_OPENSTACK_VERSIONS): + path = 'openstack/%s/%s' % (version, VD_JSON_NAME) + yield (path, self.lookup(path)) + + if self._check_version(LIBERTY, version, ALL_OPENSTACK_VERSIONS): + path = 'openstack/%s/%s' % (version, NW_JSON_NAME) + yield (path, self.lookup(path)) + + if self._check_version(NEWTON_TWO, version, + ALL_OPENSTACK_VERSIONS): + path = 'openstack/%s/%s' % (version, VD2_JSON_NAME) + yield (path, self.lookup(path)) + + for (cid, content) in self.content.items(): + yield ('%s/%s/%s' % ("openstack", CONTENT_DIR, cid), content) + + +class RouteConfiguration(object): + """Routes metadata paths to request handlers.""" + + def __init__(self, path_handler): + self.path_handlers = path_handler + + def _version(self, version): + if version == "latest": + version = OPENSTACK_VERSIONS[-1] + + if version not in OPENSTACK_VERSIONS: + raise InvalidMetadataVersion(version) + + return version + + def handle_path(self, path_tokens): + version = self._version(path_tokens[0]) + if len(path_tokens) == 1: + path = VERSION + else: + path = '/'.join(path_tokens[1:]) + + path_handler = self.path_handlers[path] + + if path_handler is None: + raise KeyError(path) + + return path_handler(version, path) + + +def get_metadata_by_address(address): + ctxt = context.get_admin_context() + fixed_ip = network.API().get_fixed_ip_by_address(ctxt, address) + LOG.info('Fixed IP %(ip)s translates to instance UUID %(uuid)s', + {'ip': address, 'uuid': fixed_ip['instance_uuid']}) + + return get_metadata_by_instance_id(fixed_ip['instance_uuid'], + address, + ctxt) + + +def get_metadata_by_instance_id(instance_id, address, ctxt=None): + ctxt = ctxt or context.get_admin_context() + attrs = ['ec2_ids', 'flavor', 'info_cache', + 'metadata', 'system_metadata', + 'security_groups', 'keypairs', + 'device_metadata'] + try: + im = objects.InstanceMapping.get_by_instance_uuid(ctxt, instance_id) + except exception.InstanceMappingNotFound: + LOG.warning('Instance mapping for %(uuid)s not found; ' + 'cell setup is incomplete', {'uuid': instance_id}) + instance = objects.Instance.get_by_uuid(ctxt, instance_id, + expected_attrs=attrs) + return InstanceMetadata(instance, address) + + with context.target_cell(ctxt, im.cell_mapping) as cctxt: + instance = objects.Instance.get_by_uuid(cctxt, instance_id, + expected_attrs=attrs) + return InstanceMetadata(instance, address) + + +def _format_instance_mapping(ctxt, instance): + bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( + ctxt, instance.uuid) + return block_device.instance_block_mapping(instance, bdms) + + +def ec2_md_print(data): + if isinstance(data, dict): + output = '' + for key in sorted(data.keys()): + if key == '_name': + continue + if isinstance(data[key], dict): + if '_name' in data[key]: + output += str(data[key]['_name']) + else: + output += key + '/' + else: + output += key + + output += '\n' + return output[:-1] + elif isinstance(data, list): + return '\n'.join(data) + elif isinstance(data, (bytes, six.text_type)): + return data + else: + return str(data) + + +def find_path_in_tree(data, path_tokens): + # given a dict/list tree, and a path in that tree, return data found there. + for i in range(0, len(path_tokens)): + if isinstance(data, dict) or isinstance(data, list): + if path_tokens[i] in data: + data = data[path_tokens[i]] + else: + raise KeyError("/".join(path_tokens[0:i])) + else: + if i != len(path_tokens) - 1: + raise KeyError("/".join(path_tokens[0:i])) + data = data[path_tokens[i]] + return data + + +# NOTE(mikal): this alias is to stop old style vendordata plugins from breaking +# post refactor. It should be removed when we finish deprecating those plugins. +VendorDataDriver = vendordata.VendorDataDriver diff --git a/nova/console/websocketproxy.py b/nova/console/websocketproxy.py new file mode 100644 index 00000000..245560ad --- /dev/null +++ b/nova/console/websocketproxy.py @@ -0,0 +1,185 @@ +# Copyright (c) 2012 OpenStack Foundation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +''' +Websocket proxy that is compatible with OpenStack Nova. +Leverages websockify.py by Joel Martin +''' + +import socket +import sys + +from oslo_log import log as logging +from six.moves import http_cookies as Cookie +import six.moves.urllib.parse as urlparse +import websockify + +import nova.conf +from nova.consoleauth import rpcapi as consoleauth_rpcapi +from nova import context +from nova import exception +from nova.i18n import _ + +LOG = logging.getLogger(__name__) + +CONF = nova.conf.CONF + + +class NovaProxyRequestHandlerBase(object): + def address_string(self): + # NOTE(rpodolyaka): override the superclass implementation here and + # explicitly disable the reverse DNS lookup, which might fail on some + # deployments due to DNS configuration and break VNC access completely + return str(self.client_address[0]) + + def verify_origin_proto(self, connection_info, origin_proto): + access_url = connection_info.get('access_url') + if not access_url: + detail = _("No access_url in connection_info. " + "Cannot validate protocol") + raise exception.ValidationError(detail=detail) + expected_protos = [urlparse.urlparse(access_url).scheme] + # NOTE: For serial consoles the expected protocol could be ws or + # wss which correspond to http and https respectively in terms of + # security. + if 'ws' in expected_protos: + expected_protos.append('http') + if 'wss' in expected_protos: + expected_protos.append('https') + + return origin_proto in expected_protos + + def new_websocket_client(self): + """Called after a new WebSocket connection has been established.""" + # Reopen the eventlet hub to make sure we don't share an epoll + # fd with parent and/or siblings, which would be bad + from eventlet import hubs + hubs.use_hub() + + # The nova expected behavior is to have token + # passed to the method GET of the request + parse = urlparse.urlparse(self.path) + if parse.scheme not in ('http', 'https'): + # From a bug in urlparse in Python < 2.7.4 we cannot support + # special schemes (cf: http://bugs.python.org/issue9374) + if sys.version_info < (2, 7, 4): + raise exception.NovaException( + _("We do not support scheme '%s' under Python < 2.7.4, " + "please use http or https") % parse.scheme) + + query = parse.query + token = urlparse.parse_qs(query).get("token", [""]).pop() + if not token: + # NoVNC uses it's own convention that forward token + # from the request to a cookie header, we should check + # also for this behavior + hcookie = self.headers.get('cookie') + if hcookie: + cookie = Cookie.SimpleCookie() + for hcookie_part in hcookie.split(';'): + hcookie_part = hcookie_part.lstrip() + try: + cookie.load(hcookie_part) + except Cookie.CookieError: + # NOTE(stgleb): Do not print out cookie content + # for security reasons. + LOG.warning('Found malformed cookie') + else: + if 'token' in cookie: + token = cookie['token'].value + + ctxt = context.get_admin_context() + rpcapi = consoleauth_rpcapi.ConsoleAuthAPI() + connect_info = rpcapi.check_token(ctxt, token=token) + + if not connect_info: + raise exception.InvalidToken(token=token) + + # Verify Origin + expected_origin_hostname = self.headers.get('Host') + if ':' in expected_origin_hostname: + e = expected_origin_hostname + if '[' in e and ']' in e: + expected_origin_hostname = e.split(']')[0][1:] + else: + expected_origin_hostname = e.split(':')[0] + expected_origin_hostnames = CONF.console.allowed_origins + expected_origin_hostnames.append(expected_origin_hostname) + origin_url = self.headers.get('Origin') + # missing origin header indicates non-browser client which is OK + if origin_url is not None: + origin = urlparse.urlparse(origin_url) + origin_hostname = origin.hostname + origin_scheme = origin.scheme + if origin_hostname == '' or origin_scheme == '': + detail = _("Origin header not valid.") + raise exception.ValidationError(detail=detail) + if origin_hostname not in expected_origin_hostnames: + detail = _("Origin header does not match this host.") + raise exception.ValidationError(detail=detail) + if not self.verify_origin_proto(connect_info, origin_scheme): + detail = _("Origin header protocol does not match this host.") + raise exception.ValidationError(detail=detail) + + self.msg(_('connect info: %s'), str(connect_info)) + host = connect_info['host'] + port = int(connect_info['port']) + + # Connect to the target + self.msg(_("connecting to: %(host)s:%(port)s") % {'host': host, + 'port': port}) + tsock = self.socket(host, port, connect=True) + + # Handshake as necessary + if connect_info.get('internal_access_path'): + tsock.send("CONNECT %s HTTP/1.1\r\n\r\n" % + connect_info['internal_access_path']) + end_token = "\r\n\r\n" + while True: + data = tsock.recv(4096, socket.MSG_PEEK) + token_loc = data.find(end_token) + if token_loc != -1: + if data.split("\r\n")[0].find("200") == -1: + raise exception.InvalidConnectionInfo() + # remove the response from recv buffer + tsock.recv(token_loc + len(end_token)) + break + + # Start proxying + try: + self.do_proxy(tsock) + except Exception: + if tsock: + tsock.shutdown(socket.SHUT_RDWR) + tsock.close() + self.vmsg(_("%(host)s:%(port)s: " + "Websocket client or target closed") % + {'host': host, 'port': port}) + raise + + +class NovaProxyRequestHandler(NovaProxyRequestHandlerBase, + websockify.ProxyRequestHandler): + def __init__(self, *args, **kwargs): + websockify.ProxyRequestHandler.__init__(self, *args, **kwargs) + + def socket(self, *args, **kwargs): + return websockify.WebSocketServer.socket(*args, **kwargs) + + +class NovaWebSocketProxy(websockify.WebSocketProxy): + @staticmethod + def get_logger(): + return LOG diff --git a/nova/consoleauth/manager.py b/nova/consoleauth/manager.py new file mode 100644 index 00000000..52d4529e --- /dev/null +++ b/nova/consoleauth/manager.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# Copyright (c) 2012 OpenStack Foundation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Auth Components for Consoles.""" + +import time + +from oslo_log import log as logging +import oslo_messaging as messaging +from oslo_serialization import jsonutils + +from nova import cache_utils +from nova.cells import rpcapi as cells_rpcapi +from nova.compute import rpcapi as compute_rpcapi +import nova.conf +from nova import context as nova_context +from nova import manager +from nova import objects + + +LOG = logging.getLogger(__name__) + +CONF = nova.conf.CONF + + +class ConsoleAuthManager(manager.Manager): + """Manages token based authentication.""" + + target = messaging.Target(version='2.1') + + def __init__(self, scheduler_driver=None, *args, **kwargs): + super(ConsoleAuthManager, self).__init__(service_name='consoleauth', + *args, **kwargs) + self._mc = None + self._mc_instance = None + self.compute_rpcapi = compute_rpcapi.ComputeAPI() + self.cells_rpcapi = cells_rpcapi.CellsAPI() + + @property + def mc(self): + if self._mc is None: + self._mc = cache_utils.get_client(CONF.consoleauth.token_ttl) + return self._mc + + @property + def mc_instance(self): + if self._mc_instance is None: + self._mc_instance = cache_utils.get_client() + return self._mc_instance + + def reset(self): + LOG.info('Reloading compute RPC API') + compute_rpcapi.LAST_VERSION = None + self.compute_rpcapi = compute_rpcapi.ComputeAPI() + + def _get_tokens_for_instance(self, instance_uuid): + tokens_str = self.mc_instance.get(instance_uuid.encode('UTF-8')) + if not tokens_str: + tokens = [] + else: + tokens = jsonutils.loads(tokens_str) + return tokens + + def authorize_console(self, context, token, console_type, host, port, + internal_access_path, instance_uuid, + access_url=None): + + token_dict = {'token': token, + 'instance_uuid': instance_uuid, + 'console_type': console_type, + 'host': host, + 'port': port, + 'internal_access_path': internal_access_path, + 'access_url': access_url, + 'last_activity_at': time.time()} + data = jsonutils.dumps(token_dict) + + self.mc.set(token.encode('UTF-8'), data) + tokens = self._get_tokens_for_instance(instance_uuid) + + # Remove the expired tokens from cache. + token_values = self.mc.get_multi( + [tok.encode('UTF-8') for tok in tokens]) + tokens = [name for name, value in zip(tokens, token_values) + if value is not None] + tokens.append(token) + + self.mc_instance.set(instance_uuid.encode('UTF-8'), + jsonutils.dumps(tokens)) + + LOG.info("Received Token: %(token)s, %(token_dict)s", + {'token': token, 'token_dict': token_dict}) + + def _validate_token(self, context, token): + instance_uuid = token['instance_uuid'] + if instance_uuid is None: + return False + + # NOTE(comstud): consoleauth was meant to run in API cells. So, + # if cells is enabled, we must call down to the child cell for + # the instance. + if CONF.cells.enable: + return self.cells_rpcapi.validate_console_port(context, + instance_uuid, token['port'], token['console_type']) + + mapping = objects.InstanceMapping.get_by_instance_uuid(context, + instance_uuid) + with nova_context.target_cell(context, mapping.cell_mapping) as cctxt: + instance = objects.Instance.get_by_uuid(cctxt, instance_uuid) + + return self.compute_rpcapi.validate_console_port( + cctxt, + instance, + token['port'], + token['console_type']) + + def check_token(self, context, token): + token_str = self.mc.get(token.encode('UTF-8')) + token_valid = (token_str is not None) + LOG.info("Checking Token: %(token)s, %(token_valid)s", + {'token': token, 'token_valid': token_valid}) + if token_valid: + token = jsonutils.loads(token_str) + if self._validate_token(context, token): + return token + + def delete_tokens_for_instance(self, context, instance_uuid): + tokens = self._get_tokens_for_instance(instance_uuid) + self.mc.delete_multi( + [tok.encode('UTF-8') for tok in tokens]) + self.mc_instance.delete(instance_uuid.encode('UTF-8')) diff --git a/nova/network/neutronv2/api.py b/nova/network/neutronv2/api.py new file mode 100644 index 00000000..0a1162d1 --- /dev/null +++ b/nova/network/neutronv2/api.py @@ -0,0 +1,2583 @@ +# Copyright 2012 OpenStack Foundation +# All Rights Reserved +# Copyright (c) 2012 NEC Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import time + +from keystoneauth1 import loading as ks_loading +from neutronclient.common import exceptions as neutron_client_exc +from neutronclient.v2_0 import client as clientv20 +from oslo_log import log as logging +from oslo_utils import excutils +from oslo_utils import uuidutils +import six + +from nova.compute import utils as compute_utils +import nova.conf +from nova import exception +from nova.i18n import _ +from nova.network import base_api +from nova.network import model as network_model +from nova.network.neutronv2 import constants +from nova import objects +from nova.objects import fields as obj_fields +from nova.pci import manager as pci_manager +from nova.pci import request as pci_request +from nova.pci import utils as pci_utils +from nova.pci import whitelist as pci_whitelist +from nova.policies import servers as servers_policies +from nova import profiler +from nova import service_auth + +CONF = nova.conf.CONF + +LOG = logging.getLogger(__name__) + +_SESSION = None +_ADMIN_AUTH = None + +DEFAULT_SECGROUP = 'default' +BINDING_PROFILE = 'binding:profile' +BINDING_HOST_ID = 'binding:host_id' +MIGRATING_ATTR = 'migrating_to' + + +def reset_state(): + global _ADMIN_AUTH + global _SESSION + + _ADMIN_AUTH = None + _SESSION = None + + +def _load_auth_plugin(conf): + auth_plugin = ks_loading.load_auth_from_conf_options(conf, + nova.conf.neutron.NEUTRON_GROUP) + + if auth_plugin: + return auth_plugin + + err_msg = _('Unknown auth type: %s') % conf.neutron.auth_type + raise neutron_client_exc.Unauthorized(message=err_msg) + + +def _get_binding_profile(port): + """Convenience method to get the binding:profile from the port + + The binding:profile in the port is undefined in the networking service + API and is dependent on backend configuration. This means it could be + an empty dict, None, or have some values. + + :param port: dict port response body from the networking service API + :returns: The port binding:profile dict; empty if not set on the port + """ + return port.get(BINDING_PROFILE, {}) or {} + + +@profiler.trace_cls("neutron_api") +class ClientWrapper(clientv20.Client): + """A Neutron client wrapper class. + + Wraps the callable methods, catches Unauthorized,Forbidden from Neutron and + convert it to a 401,403 for Nova clients. + """ + def __init__(self, base_client, admin): + # Expose all attributes from the base_client instance + self.__dict__ = base_client.__dict__ + self.base_client = base_client + self.admin = admin + + def __getattribute__(self, name): + obj = object.__getattribute__(self, name) + if callable(obj): + obj = object.__getattribute__(self, 'proxy')(obj) + return obj + + def proxy(self, obj): + def wrapper(*args, **kwargs): + try: + ret = obj(*args, **kwargs) + except neutron_client_exc.Unauthorized: + if not self.admin: + # Token is expired so Neutron is raising a + # unauthorized exception, we should convert it to + # raise a 401 to make client to handle a retry by + # regenerating a valid token and trying a new + # attempt. + raise exception.Unauthorized() + # In admin context if token is invalid Neutron client + # should be able to regenerate a valid by using the + # Neutron admin credential configuration located in + # nova.conf. + LOG.error("Neutron client was not able to generate a " + "valid admin token, please verify Neutron " + "admin credential located in nova.conf") + raise exception.NeutronAdminCredentialConfigurationInvalid() + except neutron_client_exc.Forbidden as e: + raise exception.Forbidden(e) + return ret + return wrapper + + +def get_client(context, admin=False): + # NOTE(dprince): In the case where no auth_token is present we allow use of + # neutron admin tenant credentials if it is an admin context. This is to + # support some services (metadata API) where an admin context is used + # without an auth token. + global _ADMIN_AUTH + global _SESSION + + auth_plugin = None + + if not _SESSION: + _SESSION = ks_loading.load_session_from_conf_options( + CONF, nova.conf.neutron.NEUTRON_GROUP) + + if admin or (context.is_admin and not context.auth_token): + if not _ADMIN_AUTH: + _ADMIN_AUTH = _load_auth_plugin(CONF) + auth_plugin = _ADMIN_AUTH + + elif context.auth_token: + auth_plugin = service_auth.get_auth_plugin(context) + + if not auth_plugin: + # We did not get a user token and we should not be using + # an admin token so log an error + raise exception.Unauthorized() + + return ClientWrapper( + clientv20.Client(session=_SESSION, + auth=auth_plugin, + endpoint_override=CONF.neutron.url, + region_name=CONF.neutron.region_name, + global_request_id=context.global_id), + admin=admin or context.is_admin) + + +def _is_not_duplicate(item, items, items_list_name, instance): + present = item in items + + # The expectation from this function's perspective is that the + # item is not part of the items list so if it is part of it + # we should at least log it as a warning + if present: + LOG.warning("%(item)s already exists in list: %(list_name)s " + "containing: %(items)s. ignoring it", + {'item': item, + 'list_name': items_list_name, + 'items': items}, + instance=instance) + + return not present + + +def _ensure_no_port_binding_failure(port): + binding_vif_type = port.get('binding:vif_type') + if binding_vif_type == network_model.VIF_TYPE_BINDING_FAILED: + raise exception.PortBindingFailed(port_id=port['id']) + + +def _filter_hypervisor_macs(instance, requested_ports_dict, hypervisor_macs): + """Removes macs from set if used by existing ports + + :param instance: The server instance. + :type instance: nova.objects.instance.Instance + :param requested_ports_dict: dict, keyed by port ID, of ports requested by + the user + :type requested_ports_dict: dict + :param hypervisor_macs: None or a set of MAC addresses that the + instance should use. hypervisor_macs are supplied by the hypervisor + driver (contrast with requested_networks which is user supplied). + NB: NeutronV2 currently assigns hypervisor supplied MAC addresses + to arbitrary networks, which requires openflow switches to + function correctly if more than one network is being used with + the bare metal hypervisor (which is the only one known to limit + MAC addresses). + :type hypervisor_macs: set + :returns a set of available MAC addresses to use if + creating a port later; this is the set of hypervisor_macs + after removing any MAC addresses from explicitly + requested ports. + """ + if not hypervisor_macs: + return None + + # Make a copy we can mutate: records macs that have not been used + # to create a port on a network. If we find a mac with a + # pre-allocated port we also remove it from this set. + available_macs = set(hypervisor_macs) + if not requested_ports_dict: + return available_macs + + for port in requested_ports_dict.values(): + mac = port['mac_address'] + if mac not in hypervisor_macs: + LOG.debug("Port %(port)s mac address %(mac)s is " + "not in the set of hypervisor macs: " + "%(hyper_macs)s. Nova will overwrite " + "this with a new mac address.", + {'port': port['id'], + 'mac': mac, + 'hyper_macs': hypervisor_macs}, + instance=instance) + else: + # Don't try to use this MAC if we need to create a + # port on the fly later. Identical MACs may be + # configured by users into multiple ports so we + # discard rather than popping. + available_macs.discard(mac) + + return available_macs + + +class API(base_api.NetworkAPI): + """API for interacting with the neutron 2.x API.""" + + def __init__(self): + super(API, self).__init__() + self.last_neutron_extension_sync = None + self.extensions = {} + self.pci_whitelist = pci_whitelist.Whitelist( + CONF.pci.passthrough_whitelist) + + def _update_port_with_migration_profile( + self, instance, port_id, port_profile, admin_client): + try: + updated_port = admin_client.update_port( + port_id, {'port': {BINDING_PROFILE: port_profile}}) + return updated_port + except Exception as ex: + with excutils.save_and_reraise_exception(): + LOG.error("Unable to update binding profile " + "for port: %(port)s due to failure: %(error)s", + {'port': port_id, 'error': ex}, + instance=instance) + + def _clear_migration_port_profile( + self, context, instance, admin_client, ports): + for p in ports: + # If the port already has a migration profile and if + # it is to be torn down, then we need to clean up + # the migration profile. + port_profile = _get_binding_profile(p) + if not port_profile: + continue + if MIGRATING_ATTR in port_profile: + del port_profile[MIGRATING_ATTR] + LOG.debug("Removing port %s migration profile", p['id'], + instance=instance) + self._update_port_with_migration_profile( + instance, p['id'], port_profile, admin_client) + + def _setup_migration_port_profile( + self, context, instance, host, admin_client, ports): + # Migrating to a new host + for p in ports: + # If the host hasn't changed, there is nothing to do. + # But if the destination host is different than the + # current one, please update the port_profile with + # the 'migrating_to'(MIGRATING_ATTR) key pointing to + # the given 'host'. + host_id = p.get(BINDING_HOST_ID) + if host_id != host: + port_profile = _get_binding_profile(p) + port_profile[MIGRATING_ATTR] = host + self._update_port_with_migration_profile( + instance, p['id'], port_profile, admin_client) + LOG.debug("Port %(port_id)s updated with migration " + "profile %(profile_data)s successfully", + {'port_id': p['id'], + 'profile_data': port_profile}, + instance=instance) + + def setup_networks_on_host(self, context, instance, host=None, + teardown=False): + """Setup or teardown the network structures.""" + # Check if the instance is migrating to a new host. + port_migrating = host and (instance.host != host) + # If the port is migrating to a new host or if it is a + # teardown on the original host, then proceed. + if port_migrating or teardown: + search_opts = {'device_id': instance.uuid, + 'tenant_id': instance.project_id, + BINDING_HOST_ID: instance.host} + # Now get the port details to process the ports + # binding profile info. + data = self.list_ports(context, **search_opts) + ports = data['ports'] + admin_client = get_client(context, admin=True) + if teardown: + # Reset the port profile + self._clear_migration_port_profile( + context, instance, admin_client, ports) + elif port_migrating: + # Setup the port profile + self._setup_migration_port_profile( + context, instance, host, admin_client, ports) + + def _get_available_networks(self, context, project_id, + net_ids=None, neutron=None, + auto_allocate=False): + """Return a network list available for the tenant. + The list contains networks owned by the tenant and public networks. + If net_ids specified, it searches networks with requested IDs only. + """ + if not neutron: + neutron = get_client(context) + + if net_ids: + # If user has specified to attach instance only to specific + # networks then only add these to **search_opts. This search will + # also include 'shared' networks. + search_opts = {'id': net_ids} + nets = neutron.list_networks(**search_opts).get('networks', []) + else: + # (1) Retrieve non-public network list owned by the tenant. + search_opts = {'tenant_id': project_id, 'shared': False} + if auto_allocate: + # The auto-allocated-topology extension may create complex + # network topologies and it does so in a non-transactional + # fashion. Therefore API users may be exposed to resources that + # are transient or partially built. A client should use + # resources that are meant to be ready and this can be done by + # checking their admin_state_up flag. + search_opts['admin_state_up'] = True + nets = neutron.list_networks(**search_opts).get('networks', []) + # (2) Retrieve public network list. + search_opts = {'shared': True} + nets += neutron.list_networks(**search_opts).get('networks', []) + + _ensure_requested_network_ordering( + lambda x: x['id'], + nets, + net_ids) + + return nets + + def _create_port_minimal(self, port_client, instance, network_id, + fixed_ip=None, security_group_ids=None): + """Attempts to create a port for the instance on the given network. + + :param port_client: The client to use to create the port. + :param instance: Create the port for the given instance. + :param network_id: Create the port on the given network. + :param fixed_ip: Optional fixed IP to use from the given network. + :param security_group_ids: Optional list of security group IDs to + apply to the port. + :returns: The created port. + :raises PortLimitExceeded: If neutron fails with an OverQuota error. + :raises NoMoreFixedIps: If neutron fails with + IpAddressGenerationFailure error. + :raises: PortBindingFailed: If port binding failed. + """ + # Set the device_id so it's clear who this port was created for, + # and to stop other instances trying to use it + port_req_body = {'port': {'device_id': instance.uuid}} + try: + if fixed_ip: + port_req_body['port']['fixed_ips'] = [ + {'ip_address': str(fixed_ip)}] + port_req_body['port']['network_id'] = network_id + port_req_body['port']['admin_state_up'] = True + port_req_body['port']['tenant_id'] = instance.project_id + if security_group_ids: + port_req_body['port']['security_groups'] = security_group_ids + + port_response = port_client.create_port(port_req_body) + + port = port_response['port'] + port_id = port['id'] + try: + _ensure_no_port_binding_failure(port) + except exception.PortBindingFailed: + with excutils.save_and_reraise_exception(): + port_client.delete_port(port_id) + + LOG.debug('Successfully created port: %s', port_id, + instance=instance) + return port + except neutron_client_exc.InvalidIpForNetworkClient: + LOG.warning('Neutron error: %(ip)s is not a valid IP address ' + 'for network %(network_id)s.', + {'ip': fixed_ip, 'network_id': network_id}, + instance=instance) + msg = (_('Fixed IP %(ip)s is not a valid ip address for ' + 'network %(network_id)s.') % + {'ip': fixed_ip, 'network_id': network_id}) + raise exception.InvalidInput(reason=msg) + except neutron_client_exc.IpAddressInUseClient: + LOG.warning('Neutron error: Fixed IP %s is ' + 'already in use.', fixed_ip, instance=instance) + msg = _("Fixed IP %s is already in use.") % fixed_ip + raise exception.FixedIpAlreadyInUse(message=msg) + except neutron_client_exc.OverQuotaClient: + LOG.warning( + 'Neutron error: Port quota exceeded in tenant: %s', + port_req_body['port']['tenant_id'], instance=instance) + raise exception.PortLimitExceeded() + except neutron_client_exc.IpAddressGenerationFailureClient: + LOG.warning('Neutron error: No more fixed IPs in network: %s', + network_id, instance=instance) + raise exception.NoMoreFixedIps(net=network_id) + except neutron_client_exc.NeutronClientException: + with excutils.save_and_reraise_exception(): + LOG.exception('Neutron error creating port on network %s', + network_id, instance=instance) + + def _update_port(self, port_client, instance, port_id, + port_req_body): + try: + port_response = port_client.update_port(port_id, port_req_body) + port = port_response['port'] + _ensure_no_port_binding_failure(port) + LOG.debug('Successfully updated port: %s', port_id, + instance=instance) + return port + except neutron_client_exc.MacAddressInUseClient: + mac_address = port_req_body['port'].get('mac_address') + network_id = port_req_body['port'].get('network_id') + LOG.warning('Neutron error: MAC address %(mac)s is already ' + 'in use on network %(network)s.', + {'mac': mac_address, 'network': network_id}, + instance=instance) + raise exception.PortInUse(port_id=mac_address) + except neutron_client_exc.HostNotCompatibleWithFixedIpsClient: + network_id = port_req_body['port'].get('network_id') + LOG.warning('Neutron error: Tried to bind a port with ' + 'fixed_ips to a host in the wrong segment on ' + 'network %(network)s.', + {'network': network_id}, instance=instance) + raise exception.FixedIpInvalidOnHost(port_id=port_id) + + @staticmethod + def _populate_mac_address(instance, port_req_body, available_macs): + # NOTE(johngarbutt) On port_update, this will cause us to override + # any previous mac address the port may have had. + if available_macs is not None: + if not available_macs: + raise exception.PortNotFree( + instance=instance.uuid) + mac_address = available_macs.pop() + port_req_body['port']['mac_address'] = mac_address + return mac_address + + def _check_external_network_attach(self, context, nets): + """Check if attaching to external network is permitted.""" + if not context.can(servers_policies.NETWORK_ATTACH_EXTERNAL, + fatal=False): + for net in nets: + # Perform this check here rather than in validate_networks to + # ensure the check is performed every time + # allocate_for_instance is invoked + if net.get('router:external') and not net.get('shared'): + raise exception.ExternalNetworkAttachForbidden( + network_uuid=net['id']) + + def _unbind_ports(self, context, ports, + neutron, port_client=None): + """Unbind the given ports by clearing their device_id and + device_owner. + + :param context: The request context. + :param ports: list of port IDs. + :param neutron: neutron client for the current context. + :param port_client: The client with appropriate karma for + updating the ports. + """ + if port_client is None: + # Requires admin creds to set port bindings + port_client = get_client(context, admin=True) + for port_id in ports: + # A port_id is optional in the NetworkRequest object so check here + # in case the caller forgot to filter the list. + if port_id is None: + continue + port_req_body = {'port': {'device_id': '', 'device_owner': ''}} + port_req_body['port'][BINDING_HOST_ID] = None + port_req_body['port'][BINDING_PROFILE] = {} + if self._has_dns_extension(): + port_req_body['port']['dns_name'] = '' + try: + port_client.update_port(port_id, port_req_body) + except neutron_client_exc.PortNotFoundClient: + LOG.debug('Unable to unbind port %s as it no longer exists.', + port_id) + except Exception: + LOG.exception("Unable to clear device ID for port '%s'", + port_id) + + def _validate_requested_port_ids(self, context, instance, neutron, + requested_networks): + """Processes and validates requested networks for allocation. + + Iterates over the list of NetworkRequest objects, validating the + request and building sets of ports and networks to + use for allocating ports for the instance. + + :param context: The user request context. + :type context: nova.context.RequestContext + :param instance: allocate networks on this instance + :type instance: nova.objects.Instance + :param neutron: neutron client session + :type neutron: neutronclient.v2_0.client.Client + :param requested_networks: List of user-requested networks and/or ports + :type requested_networks: nova.objects.NetworkRequestList + :returns: tuple of: + - ports: dict mapping of port id to port dict + - ordered_networks: list of nova.objects.NetworkRequest objects + for requested networks (either via explicit network request + or the network for an explicit port request) + :raises nova.exception.PortNotFound: If a requested port is not found + in Neutron. + :raises nova.exception.PortNotUsable: If a requested port is not owned + by the same tenant that the instance is created under. + :raises nova.exception.PortInUse: If a requested port is already + attached to another instance. + :raises nova.exception.PortNotUsableDNS: If a requested port has a + value assigned to its dns_name attribute. + """ + ports = {} + ordered_networks = [] + # If we're asked to auto-allocate the network then there won't be any + # ports or real neutron networks to lookup, so just return empty + # results. + if requested_networks and not requested_networks.auto_allocate: + for request in requested_networks: + + # Process a request to use a pre-existing neutron port. + if request.port_id: + # Make sure the port exists. + port = self._show_port(context, request.port_id, + neutron_client=neutron) + # Make sure the instance has access to the port. + if port['tenant_id'] != instance.project_id: + raise exception.PortNotUsable(port_id=request.port_id, + instance=instance.uuid) + + # Make sure the port isn't already attached to another + # instance. + if port.get('device_id'): + raise exception.PortInUse(port_id=request.port_id) + + # Make sure that if the user assigned a value to the port's + # dns_name attribute, it is equal to the instance's + # hostname + if port.get('dns_name'): + if port['dns_name'] != instance.hostname: + raise exception.PortNotUsableDNS( + port_id=request.port_id, + instance=instance.uuid, value=port['dns_name'], + hostname=instance.hostname) + + # Make sure the port is usable + _ensure_no_port_binding_failure(port) + + # If requesting a specific port, automatically process + # the network for that port as if it were explicitly + # requested. + request.network_id = port['network_id'] + ports[request.port_id] = port + + # Process a request to use a specific neutron network. + if request.network_id: + ordered_networks.append(request) + + return ports, ordered_networks + + def _clean_security_groups(self, security_groups): + """Cleans security groups requested from Nova API + + Neutron already passes a 'default' security group when + creating ports so it's not necessary to specify it to the + request. + """ + if not security_groups: + security_groups = [] + elif security_groups == [DEFAULT_SECGROUP]: + security_groups = [] + return security_groups + + def _process_security_groups(self, instance, neutron, security_groups): + """Processes and validates requested security groups for allocation. + + Iterates over the list of requested security groups, validating the + request and filtering out the list of security group IDs to use for + port allocation. + + :param instance: allocate networks on this instance + :type instance: nova.objects.Instance + :param neutron: neutron client session + :type neutron: neutronclient.v2_0.client.Client + :param security_groups: list of requested security group name or IDs + to use when allocating new ports for the instance + :return: list of security group IDs to use when allocating new ports + :raises nova.exception.NoUniqueMatch: If multiple security groups + are requested with the same name. + :raises nova.exception.SecurityGroupNotFound: If a requested security + group is not in the tenant-filtered list of available security + groups in Neutron. + """ + security_group_ids = [] + # TODO(arosen) Should optimize more to do direct query for security + # group if len(security_groups) == 1 + if len(security_groups): + search_opts = {'tenant_id': instance.project_id} + user_security_groups = neutron.list_security_groups( + **search_opts).get('security_groups') + + for security_group in security_groups: + name_match = None + uuid_match = None + for user_security_group in user_security_groups: + if user_security_group['name'] == security_group: + # If there was a name match in a previous iteration + # of the loop, we have a conflict. + if name_match: + raise exception.NoUniqueMatch( + _("Multiple security groups found matching" + " '%s'. Use an ID to be more specific.") % + security_group) + + name_match = user_security_group['id'] + + if user_security_group['id'] == security_group: + uuid_match = user_security_group['id'] + + # If a user names the security group the same as + # another's security groups uuid, the name takes priority. + if name_match: + security_group_ids.append(name_match) + elif uuid_match: + security_group_ids.append(uuid_match) + else: + raise exception.SecurityGroupNotFound( + security_group_id=security_group) + + return security_group_ids + + def _validate_requested_network_ids(self, context, instance, neutron, + requested_networks, ordered_networks): + """Check requested networks using the Neutron API. + + Check the user has access to the network they requested, and that + it is a suitable network to connect to. This includes getting the + network details for any ports that have been passed in, because the + request will have been updated with the network_id in + _validate_requested_port_ids. + + If the user has not requested any ports or any networks, we get back + a full list of networks the user has access to, and if there is only + one network, we update ordered_networks so we will connect the + instance to that network. + + :param context: The request context. + :param instance: nova.objects.instance.Instance object. + :param neutron: neutron client + :param requested_networks: nova.objects.NetworkRequestList, list of + user-requested networks and/or ports; may be empty + :param ordered_networks: output from _validate_requested_port_ids + that will be used to create and update ports + :returns: dict, keyed by network ID, of networks to use + :raises InterfaceAttachFailedNoNetwork: If no specific networks were + requested and none are available. + :raises NetworkAmbiguous: If no specific networks were requested but + more than one is available. + :raises ExternalNetworkAttachForbidden: If the policy rules forbid + the request context from using an external non-shared network but + one was requested (or available). + """ + + # Get networks from Neutron + # If net_ids is empty, this actually returns all available nets + auto_allocate = requested_networks and requested_networks.auto_allocate + net_ids = [request.network_id for request in ordered_networks] + nets = self._get_available_networks(context, instance.project_id, + net_ids, neutron=neutron, + auto_allocate=auto_allocate) + if not nets: + + if requested_networks: + # There are no networks available for the project to use and + # none specifically requested, so check to see if we're asked + # to auto-allocate the network. + if auto_allocate: + # During validate_networks we checked to see if + # auto-allocation is available so we don't need to do that + # again here. + nets = [self._auto_allocate_network(instance, neutron)] + else: + # NOTE(chaochin): If user specifies a network id and the + # network can not be found, raise NetworkNotFound error. + for request in requested_networks: + if not request.port_id and request.network_id: + raise exception.NetworkNotFound( + network_id=request.network_id) + else: + # no requested nets and user has no available nets + return {} + + # if this function is directly called without a requested_network param + # or if it is indirectly called through allocate_port_for_instance() + # with None params=(network_id=None, requested_ip=None, port_id=None, + # pci_request_id=None): + if (not requested_networks + or requested_networks.is_single_unspecified + or requested_networks.auto_allocate): + # If no networks were requested and none are available, consider + # it a bad request. + if not nets: + raise exception.InterfaceAttachFailedNoNetwork( + project_id=instance.project_id) + # bug/1267723 - if no network is requested and more + # than one is available then raise NetworkAmbiguous Exception + if len(nets) > 1: + msg = _("Multiple possible networks found, use a Network " + "ID to be more specific.") + raise exception.NetworkAmbiguous(msg) + ordered_networks.append( + objects.NetworkRequest(network_id=nets[0]['id'])) + + # NOTE(melwitt): check external net attach permission after the + # check for ambiguity, there could be another + # available net which is permitted bug/1364344 + self._check_external_network_attach(context, nets) + + return {net['id']: net for net in nets} + + def _create_ports_for_instance(self, context, instance, ordered_networks, + nets, neutron, security_group_ids): + """Create port for network_requests that don't have a port_id + + :param context: The request context. + :param instance: nova.objects.instance.Instance object. + :param ordered_networks: objects.NetworkRequestList in requested order + :param nets: a dict of network_id to networks returned from neutron + :param neutron: neutronclient built from users request context + :param security_group_ids: a list of security group IDs to be applied + to any ports created + :returns a list of pairs (NetworkRequest, created_port_uuid); note that + created_port_uuid will be None for the pair where a pre-existing + port was part of the user request + """ + created_port_ids = [] + requests_and_created_ports = [] + for request in ordered_networks: + network = nets.get(request.network_id) + # if network_id did not pass validate_networks() and not available + # here then skip it safely not continuing with a None Network + if not network: + continue + + try: + port_security_enabled = network.get( + 'port_security_enabled', True) + if port_security_enabled: + if not network.get('subnets'): + # Neutron can't apply security groups to a port + # for a network without L3 assignments. + LOG.debug('Network with port security enabled does ' + 'not have subnets so security groups ' + 'cannot be applied: %s', + network, instance=instance) + raise exception.SecurityGroupCannotBeApplied() + else: + if security_group_ids: + # We don't want to apply security groups on port + # for a network defined with + # 'port_security_enabled=False'. + LOG.debug('Network has port security disabled so ' + 'security groups cannot be applied: %s', + network, instance=instance) + raise exception.SecurityGroupCannotBeApplied() + + created_port_id = None + if not request.port_id: + # create minimal port, if port not already created by user + created_port = self._create_port_minimal( + neutron, instance, request.network_id, + request.address, security_group_ids) + created_port_id = created_port['id'] + created_port_ids.append(created_port_id) + + requests_and_created_ports.append(( + request, created_port_id)) + + except Exception: + with excutils.save_and_reraise_exception(): + if created_port_ids: + self._delete_ports( + neutron, instance, created_port_ids) + + return requests_and_created_ports + + def allocate_for_instance(self, context, instance, vpn, + requested_networks, macs=None, + security_groups=None, bind_host_id=None): + """Allocate network resources for the instance. + + :param context: The request context. + :param instance: nova.objects.instance.Instance object. + :param vpn: A boolean, ignored by this driver. + :param requested_networks: objects.NetworkRequestList object. + :param macs: None or a set of MAC addresses that the instance + should use. macs is supplied by the hypervisor driver (contrast + with requested_networks which is user supplied). + NB: NeutronV2 currently assigns hypervisor supplied MAC addresses + to arbitrary networks, which requires openflow switches to + function correctly if more than one network is being used with + the bare metal hypervisor (which is the only one known to limit + MAC addresses). + :param security_groups: None or security groups to allocate for + instance. + :param bind_host_id: the host ID to attach to the ports being created. + :returns: network info as from get_instance_nw_info() + """ + LOG.debug('allocate_for_instance()', instance=instance) + if not instance.project_id: + msg = _('empty project id for instance %s') + raise exception.InvalidInput( + reason=msg % instance.uuid) + + # We do not want to create a new neutron session for each call + neutron = get_client(context) + + # + # Validate ports and networks with neutron. The requested_ports_dict + # variable is a dict, keyed by port ID, of ports that were on the user + # request and may be empty. The ordered_networks variable is a list of + # NetworkRequest objects for any networks or ports specifically + # requested by the user, which again may be empty. + # + requested_ports_dict, ordered_networks = ( + self._validate_requested_port_ids( + context, instance, neutron, requested_networks)) + + nets = self._validate_requested_network_ids( + context, instance, neutron, requested_networks, ordered_networks) + if not nets: + LOG.debug("No network configured", instance=instance) + return network_model.NetworkInfo([]) + + # Validate requested security groups + security_groups = self._clean_security_groups(security_groups) + security_group_ids = self._process_security_groups( + instance, neutron, security_groups) + + # Create ports from the list of ordered_networks. The returned + # requests_and_created_ports variable is a list of 2-item tuples of + # the form (NetworkRequest, created_port_id). Note that a tuple pair + # will have None for the created_port_id if the NetworkRequest already + # contains a port_id, meaning the user requested a specific + # pre-existing port so one wasn't created here. The ports will be + # updated later in _update_ports_for_instance to be bound to the + # instance and compute host. + requests_and_created_ports = self._create_ports_for_instance( + context, instance, ordered_networks, nets, neutron, + security_group_ids) + + # + # Update existing and newly created ports + # + available_macs = _filter_hypervisor_macs( + instance, requested_ports_dict, macs) + + # We always need admin_client to build nw_info, + # we sometimes need it when updating ports + admin_client = get_client(context, admin=True) + + ordered_nets, ordered_port_ids, preexisting_port_ids, \ + created_port_ids = self._update_ports_for_instance( + context, instance, + neutron, admin_client, requests_and_created_ports, nets, + bind_host_id, available_macs) + + # + # Perform a full update of the network_info_cache, + # including re-fetching lots of the required data from neutron + # + nw_info = self.get_instance_nw_info( + context, instance, networks=ordered_nets, + port_ids=ordered_port_ids, + admin_client=admin_client, + preexisting_port_ids=preexisting_port_ids, + update_cells=True) + # Only return info about ports we processed in this run, which might + # have been pre-existing neutron ports or ones that nova created. In + # the initial allocation case (server create), this will be everything + # we processed, and in later runs will only be what was processed that + # time. For example, if the instance was created with port A and + # then port B was attached in this call, only port B would be returned. + # Thus, this filtering only affects the attach case. + return network_model.NetworkInfo([vif for vif in nw_info + if vif['id'] in created_port_ids + + preexisting_port_ids]) + + def _update_ports_for_instance(self, context, instance, neutron, + admin_client, requests_and_created_ports, nets, + bind_host_id, available_macs): + """Update ports from network_requests. + + Updates the pre-existing ports and the ones created in + ``_create_ports_for_instance`` with ``device_id``, ``device_owner``, + optionally ``mac_address`` and, depending on the + loaded extensions, ``rxtx_factor``, ``binding:host_id``, ``dns_name``. + + :param context: The request context. + :param instance: nova.objects.instance.Instance object. + :param neutron: client using user context + :param admin_client: client using admin context + :param requests_and_created_ports: [(NetworkRequest, created_port_id)]; + Note that created_port_id will be None for any user-requested + pre-existing port. + :param nets: a dict of network_id to networks returned from neutron + :param bind_host_id: a string for port['binding:host_id'] + :param available_macs: a list of available mac addresses + :returns: tuple with the following:: + + * list of network dicts in their requested order + * list of port IDs in their requested order - note that does not + mean the port was requested by the user, it could be a port + created on a network requested by the user + * list of pre-existing port IDs requested by the user + * list of created port IDs + """ + + # We currently require admin creds to set port bindings. + port_client = admin_client + + preexisting_port_ids = [] + created_port_ids = [] + ports_in_requested_order = [] + nets_in_requested_order = [] + created_vifs = [] # this list is for cleanups if we fail + for request, created_port_id in requests_and_created_ports: + vifobj = objects.VirtualInterface(context) + vifobj.instance_uuid = instance.uuid + vifobj.tag = request.tag if 'tag' in request else None + + network = nets.get(request.network_id) + # if network_id did not pass validate_networks() and not available + # here then skip it safely not continuing with a None Network + if not network: + continue + + nets_in_requested_order.append(network) + + zone = 'compute:%s' % instance.availability_zone + port_req_body = {'port': {'device_id': instance.uuid, + 'device_owner': zone}} + try: + self._populate_neutron_extension_values( + context, instance, request.pci_request_id, port_req_body, + network=network, neutron=neutron, + bind_host_id=bind_host_id) + self._populate_pci_mac_address(instance, + request.pci_request_id, port_req_body) + self._populate_mac_address( + instance, port_req_body, available_macs) + + if created_port_id: + port_id = created_port_id + created_port_ids.append(port_id) + else: + port_id = request.port_id + ports_in_requested_order.append(port_id) + + # After port is created, update other bits + updated_port = self._update_port( + port_client, instance, port_id, port_req_body) + + # NOTE(danms): The virtual_interfaces table enforces global + # uniqueness on MAC addresses, which clearly does not match + # with neutron's view of the world. Since address is a 255-char + # string we can namespace it with our port id. Using '/' should + # be safely excluded from MAC address notations as well as + # UUIDs. We could stop doing this when we remove + # nova-network, but we'd need to leave the read translation in + # for longer than that of course. + vifobj.address = '%s/%s' % (updated_port['mac_address'], + updated_port['id']) + vifobj.uuid = port_id + vifobj.create() + created_vifs.append(vifobj) + + if not created_port_id: + # only add if update worked and port create not called + preexisting_port_ids.append(port_id) + + self._update_port_dns_name(context, instance, network, + ports_in_requested_order[-1], + neutron) + except Exception: + with excutils.save_and_reraise_exception(): + self._unbind_ports(context, + preexisting_port_ids, + neutron, port_client) + self._delete_ports(neutron, instance, created_port_ids) + for vif in created_vifs: + vif.destroy() + + return (nets_in_requested_order, ports_in_requested_order, + preexisting_port_ids, created_port_ids) + + def _refresh_neutron_extensions_cache(self, context, neutron=None): + """Refresh the neutron extensions cache when necessary.""" + if (not self.last_neutron_extension_sync or + ((time.time() - self.last_neutron_extension_sync) + >= CONF.neutron.extension_sync_interval)): + if neutron is None: + neutron = get_client(context) + extensions_list = neutron.list_extensions()['extensions'] + self.last_neutron_extension_sync = time.time() + self.extensions.clear() + self.extensions = {ext['name']: ext for ext in extensions_list} + + def _has_multi_provider_extension(self, context, neutron=None): + self._refresh_neutron_extensions_cache(context, neutron=neutron) + return constants.MULTI_NET_EXT in self.extensions + + def _has_dns_extension(self): + return constants.DNS_INTEGRATION in self.extensions + + def _has_qos_queue_extension(self, context, neutron=None): + self._refresh_neutron_extensions_cache(context, neutron=neutron) + return constants.QOS_QUEUE in self.extensions + + def _get_pci_device_profile(self, pci_dev): + dev_spec = self.pci_whitelist.get_devspec(pci_dev) + if dev_spec: + return {'pci_vendor_info': "%s:%s" % + (pci_dev.vendor_id, pci_dev.product_id), + 'pci_slot': pci_dev.address, + 'physical_network': + dev_spec.get_tags().get('physical_network')} + raise exception.PciDeviceNotFound(node_id=pci_dev.compute_node_id, + address=pci_dev.address) + + def _populate_neutron_binding_profile(self, instance, pci_request_id, + port_req_body): + """Populate neutron binding:profile. + + Populate it with SR-IOV related information + """ + if pci_request_id: + pci_dev = pci_manager.get_instance_pci_devs( + instance, pci_request_id).pop() + profile = self._get_pci_device_profile(pci_dev) + port_req_body['port'][BINDING_PROFILE] = profile + + @staticmethod + def _populate_pci_mac_address(instance, pci_request_id, port_req_body): + """Add the updated MAC address value to the update_port request body. + + Currently this is done only for PF passthrough. + """ + if pci_request_id is not None: + pci_devs = pci_manager.get_instance_pci_devs( + instance, pci_request_id) + if len(pci_devs) != 1: + # NOTE(ndipanov): We shouldn't ever get here since + # InstancePCIRequest instances built from network requests + # only ever index a single device, which needs to be + # successfully claimed for this to be called as part of + # allocate_networks method + LOG.error("PCI request %s does not have a " + "unique device associated with it. Unable to " + "determine MAC address", + pci_request, instance=instance) + return + pci_dev = pci_devs[0] + if pci_dev.dev_type == obj_fields.PciDeviceType.SRIOV_PF: + try: + mac = pci_utils.get_mac_by_pci_address(pci_dev.address) + except exception.PciDeviceNotFoundById as e: + LOG.error( + "Could not determine MAC address for %(addr)s, " + "error: %(e)s", + {"addr": pci_dev.address, "e": e}, instance=instance) + else: + port_req_body['port']['mac_address'] = mac + + def _populate_neutron_extension_values(self, context, instance, + pci_request_id, port_req_body, + network=None, neutron=None, + bind_host_id=None): + """Populate neutron extension values for the instance. + + If the extensions loaded contain QOS_QUEUE then pass the rxtx_factor. + """ + if self._has_qos_queue_extension(context, neutron=neutron): + flavor = instance.get_flavor() + rxtx_factor = flavor.get('rxtx_factor') + port_req_body['port']['rxtx_factor'] = rxtx_factor + port_req_body['port'][BINDING_HOST_ID] = bind_host_id + self._populate_neutron_binding_profile(instance, + pci_request_id, + port_req_body) + + if self._has_dns_extension(): + # If the DNS integration extension is enabled in Neutron, most + # ports will get their dns_name attribute set in the port create or + # update requests in allocate_for_instance. So we just add the + # dns_name attribute to the payload of those requests. The + # exception is when the port binding extension is enabled in + # Neutron and the port is on a network that has a non-blank + # dns_domain attribute. This case requires to be processed by + # method _update_port_dns_name + if (not network.get('dns_domain')): + port_req_body['port']['dns_name'] = instance.hostname + + def _update_port_dns_name(self, context, instance, network, port_id, + neutron): + """Update an instance port dns_name attribute with instance.hostname. + + The dns_name attribute of a port on a network with a non-blank + dns_domain attribute will be sent to the external DNS service + (Designate) if DNS integration is enabled in Neutron. This requires the + assignment of the dns_name to the port to be done with a Neutron client + using the user's context. allocate_for_instance uses a port with admin + context if the port binding extensions is enabled in Neutron. In this + case, we assign in this method the dns_name attribute to the port with + an additional update request. Only a very small fraction of ports will + require this additional update request. + """ + if self._has_dns_extension() and network.get('dns_domain'): + try: + port_req_body = {'port': {'dns_name': instance.hostname}} + neutron.update_port(port_id, port_req_body) + except neutron_client_exc.BadRequest: + LOG.warning('Neutron error: Instance hostname ' + '%(hostname)s is not a valid DNS name', + {'hostname': instance.hostname}, instance=instance) + msg = (_('Instance hostname %(hostname)s is not a valid DNS ' + 'name') % {'hostname': instance.hostname}) + raise exception.InvalidInput(reason=msg) + + def _delete_ports(self, neutron, instance, ports, raise_if_fail=False): + exceptions = [] + for port in ports: + try: + neutron.delete_port(port) + except neutron_client_exc.NeutronClientException as e: + if e.status_code == 404: + LOG.warning("Port %s does not exist", port, + instance=instance) + else: + exceptions.append(e) + LOG.warning("Failed to delete port %s for instance.", + port, instance=instance, exc_info=True) + if len(exceptions) > 0 and raise_if_fail: + raise exceptions[0] + + def deallocate_for_instance(self, context, instance, **kwargs): + """Deallocate all network resources related to the instance.""" + LOG.debug('deallocate_for_instance()', instance=instance) + search_opts = {'device_id': instance.uuid} + neutron = get_client(context) + data = neutron.list_ports(**search_opts) + ports = [port['id'] for port in data.get('ports', [])] + + requested_networks = kwargs.get('requested_networks') or [] + # NOTE(danms): Temporary and transitional + if isinstance(requested_networks, objects.NetworkRequestList): + requested_networks = requested_networks.as_tuples() + ports_to_skip = set([port_id for nets, fips, port_id, pci_request_id + in requested_networks]) + # NOTE(boden): requested_networks only passed in when deallocating + # from a failed build / spawn call. Therefore we need to include + # preexisting ports when deallocating from a standard delete op + # in which case requested_networks is not provided. + ports_to_skip |= set(self._get_preexisting_port_ids(instance)) + ports = set(ports) - ports_to_skip + + # Reset device_id and device_owner for the ports that are skipped + self._unbind_ports(context, ports_to_skip, neutron) + # Delete the rest of the ports + self._delete_ports(neutron, instance, ports, raise_if_fail=True) + + # deallocate vifs (mac addresses) + objects.VirtualInterface.delete_by_instance_uuid( + context, instance.uuid) + + # NOTE(arosen): This clears out the network_cache only if the instance + # hasn't already been deleted. This is needed when an instance fails to + # launch and is rescheduled onto another compute node. If the instance + # has already been deleted this call does nothing. + base_api.update_instance_cache_with_nw_info(self, context, instance, + network_model.NetworkInfo([])) + + def allocate_port_for_instance(self, context, instance, port_id, + network_id=None, requested_ip=None, + bind_host_id=None, tag=None): + """Allocate a port for the instance.""" + requested_networks = objects.NetworkRequestList( + objects=[objects.NetworkRequest(network_id=network_id, + address=requested_ip, + port_id=port_id, + pci_request_id=None, + tag=tag)]) + return self.allocate_for_instance(context, instance, vpn=False, + requested_networks=requested_networks, + bind_host_id=bind_host_id) + + def deallocate_port_for_instance(self, context, instance, port_id): + """Remove a specified port from the instance. + + Return network information for the instance + """ + neutron = get_client(context) + preexisting_ports = self._get_preexisting_port_ids(instance) + if port_id in preexisting_ports: + self._unbind_ports(context, [port_id], neutron) + else: + self._delete_ports(neutron, instance, [port_id], + raise_if_fail=True) + + # Delete the VirtualInterface for the given port_id. + vif = objects.VirtualInterface.get_by_uuid(context, port_id) + if vif: + if 'tag' in vif and vif.tag: + self._delete_nic_metadata(instance, vif) + vif.destroy() + else: + LOG.debug('VirtualInterface not found for port: %s', + port_id, instance=instance) + + return self.get_instance_nw_info(context, instance) + + def _delete_nic_metadata(self, instance, vif): + for device in instance.device_metadata.devices: + if (isinstance(device, objects.NetworkInterfaceMetadata) + and device.mac == vif.address): + instance.device_metadata.devices.remove(device) + instance.save() + break + + def list_ports(self, context, **search_opts): + """List ports for the client based on search options.""" + return get_client(context).list_ports(**search_opts) + + def show_port(self, context, port_id): + """Return the port for the client given the port id. + + :param context: Request context. + :param port_id: The id of port to be queried. + :returns: A dict containing port data keyed by 'port', e.g. + + :: + + {'port': {'port_id': 'abcd', + 'fixed_ip_address': '1.2.3.4'}} + """ + return dict(port=self._show_port(context, port_id)) + + def _show_port(self, context, port_id, neutron_client=None, fields=None): + """Return the port for the client given the port id. + + :param context: Request context. + :param port_id: The id of port to be queried. + :param neutron_client: A neutron client. + :param fields: The condition fields to query port data. + :returns: A dict of port data. + e.g. {'port_id': 'abcd', 'fixed_ip_address': '1.2.3.4'} + """ + if not neutron_client: + neutron_client = get_client(context) + try: + if fields: + result = neutron_client.show_port(port_id, fields=fields) + else: + result = neutron_client.show_port(port_id) + return result.get('port') + except neutron_client_exc.PortNotFoundClient: + raise exception.PortNotFound(port_id=port_id) + except neutron_client_exc.Unauthorized: + raise exception.Forbidden() + except neutron_client_exc.NeutronClientException as exc: + msg = (_("Failed to access port %(port_id)s: %(reason)s") % + {'port_id': port_id, 'reason': exc}) + raise exception.NovaException(message=msg) + + def _get_instance_nw_info(self, context, instance, networks=None, + port_ids=None, admin_client=None, + preexisting_port_ids=None, **kwargs): + # NOTE(danms): This is an inner method intended to be called + # by other code that updates instance nwinfo. It *must* be + # called with the refresh_cache-%(instance_uuid) lock held! + LOG.debug('_get_instance_nw_info()', instance=instance) + # Ensure that we have an up to date copy of the instance info cache. + # Otherwise multiple requests could collide and cause cache + # corruption. + compute_utils.refresh_info_cache_for_instance(context, instance) + nw_info = self._build_network_info_model(context, instance, networks, + port_ids, admin_client, + preexisting_port_ids) + return network_model.NetworkInfo.hydrate(nw_info) + + def _gather_port_ids_and_networks(self, context, instance, networks=None, + port_ids=None, neutron=None): + """Return an instance's complete list of port_ids and networks.""" + + if ((networks is None and port_ids is not None) or + (port_ids is None and networks is not None)): + message = _("This method needs to be called with either " + "networks=None and port_ids=None or port_ids and " + "networks as not none.") + raise exception.NovaException(message=message) + + ifaces = instance.get_network_info() + # This code path is only done when refreshing the network_cache + if port_ids is None: + port_ids = [iface['id'] for iface in ifaces] + net_ids = [iface['network']['id'] for iface in ifaces] + + if networks is None: + networks = self._get_available_networks(context, + instance.project_id, + net_ids, neutron) + # an interface was added/removed from instance. + else: + + # Prepare the network ids list for validation purposes + networks_ids = [network['id'] for network in networks] + + # Validate that interface networks doesn't exist in networks. + # Though this issue can and should be solved in methods + # that prepare the networks list, this method should have this + # ignore-duplicate-networks/port-ids mechanism to reduce the + # probability of failing to boot the VM. + networks = networks + [ + {'id': iface['network']['id'], + 'name': iface['network']['label'], + 'tenant_id': iface['network']['meta']['tenant_id']} + for iface in ifaces + if _is_not_duplicate(iface['network']['id'], + networks_ids, + "networks", + instance)] + + # Include existing interfaces so they are not removed from the db. + # Validate that the interface id is not in the port_ids + port_ids = [iface['id'] for iface in ifaces + if _is_not_duplicate(iface['id'], + port_ids, + "port_ids", + instance)] + port_ids + + return networks, port_ids + + @base_api.refresh_cache + def add_fixed_ip_to_instance(self, context, instance, network_id): + """Add a fixed IP to the instance from specified network.""" + neutron = get_client(context) + search_opts = {'network_id': network_id} + data = neutron.list_subnets(**search_opts) + ipam_subnets = data.get('subnets', []) + if not ipam_subnets: + raise exception.NetworkNotFoundForInstance( + instance_id=instance.uuid) + + zone = 'compute:%s' % instance.availability_zone + search_opts = {'device_id': instance.uuid, + 'device_owner': zone, + 'network_id': network_id} + data = neutron.list_ports(**search_opts) + ports = data['ports'] + for p in ports: + for subnet in ipam_subnets: + fixed_ips = p['fixed_ips'] + fixed_ips.append({'subnet_id': subnet['id']}) + port_req_body = {'port': {'fixed_ips': fixed_ips}} + try: + neutron.update_port(p['id'], port_req_body) + return self._get_instance_nw_info(context, instance) + except Exception as ex: + msg = ("Unable to update port %(portid)s on subnet " + "%(subnet_id)s with failure: %(exception)s") + LOG.debug(msg, {'portid': p['id'], + 'subnet_id': subnet['id'], + 'exception': ex}, instance=instance) + + raise exception.NetworkNotFoundForInstance( + instance_id=instance.uuid) + + @base_api.refresh_cache + def remove_fixed_ip_from_instance(self, context, instance, address): + """Remove a fixed IP from the instance.""" + neutron = get_client(context) + zone = 'compute:%s' % instance.availability_zone + search_opts = {'device_id': instance.uuid, + 'device_owner': zone, + 'fixed_ips': 'ip_address=%s' % address} + data = neutron.list_ports(**search_opts) + ports = data['ports'] + for p in ports: + fixed_ips = p['fixed_ips'] + new_fixed_ips = [] + for fixed_ip in fixed_ips: + if fixed_ip['ip_address'] != address: + new_fixed_ips.append(fixed_ip) + port_req_body = {'port': {'fixed_ips': new_fixed_ips}} + try: + neutron.update_port(p['id'], port_req_body) + except Exception as ex: + msg = ("Unable to update port %(portid)s with" + " failure: %(exception)s") + LOG.debug(msg, {'portid': p['id'], 'exception': ex}, + instance=instance) + return self._get_instance_nw_info(context, instance) + + raise exception.FixedIpNotFoundForSpecificInstance( + instance_uuid=instance.uuid, ip=address) + + def _get_phynet_info(self, context, neutron, net_id): + phynet_name = None + if self._has_multi_provider_extension(context, neutron=neutron): + network = neutron.show_network(net_id, + fields='segments').get('network') + segments = network.get('segments', {}) + for net in segments: + # NOTE(vladikr): In general, "multi-segments" network is a + # combination of L2 segments. The current implementation + # contains a vxlan and vlan(s) segments, where only a vlan + # network will have a physical_network specified, but may + # change in the future. The purpose of this method + # is to find a first segment that provides a physical network. + # TODO(vladikr): Additional work will be required to handle the + # case of multiple vlan segments associated with different + # physical networks. + phynet_name = net.get('provider:physical_network') + if phynet_name: + return phynet_name + # Raising here as at least one segment should + # have a physical network provided. + if segments: + msg = (_("None of the segments of network %s provides a " + "physical_network") % net_id) + raise exception.NovaException(message=msg) + + net = neutron.show_network(net_id, + fields='provider:physical_network').get('network') + phynet_name = net.get('provider:physical_network') + return phynet_name + + def _get_port_vnic_info(self, context, neutron, port_id): + """Retrieve port vnic info + + Invoked with a valid port_id. + Return vnic type and the attached physical network name. + """ + phynet_name = None + port = self._show_port(context, port_id, neutron_client=neutron, + fields=['binding:vnic_type', 'network_id']) + vnic_type = port.get('binding:vnic_type', + network_model.VNIC_TYPE_NORMAL) + if vnic_type in network_model.VNIC_TYPES_SRIOV: + net_id = port['network_id'] + phynet_name = self._get_phynet_info(context, neutron, net_id) + return vnic_type, phynet_name + + def create_pci_requests_for_sriov_ports(self, context, pci_requests, + requested_networks): + """Check requested networks for any SR-IOV port request. + + Create a PCI request object for each SR-IOV port, and add it to the + pci_requests object that contains a list of PCI request object. + """ + if not requested_networks or requested_networks.no_allocate: + return + + neutron = get_client(context, admin=True) + for request_net in requested_networks: + phynet_name = None + vnic_type = network_model.VNIC_TYPE_NORMAL + + if request_net.port_id: + vnic_type, phynet_name = self._get_port_vnic_info( + context, neutron, request_net.port_id) + pci_request_id = None + if vnic_type in network_model.VNIC_TYPES_SRIOV: + # TODO(moshele): To differentiate between the SR-IOV legacy + # and SR-IOV ovs hardware offload we will leverage the nic + # feature based scheduling in nova. This mean we will need + # libvirt to expose the nic feature. At the moment + # there is a limitation that deployers cannot use both + # SR-IOV modes (legacy and ovs) in the same deployment. + spec = {pci_request.PCI_NET_TAG: phynet_name} + dev_type = pci_request.DEVICE_TYPE_FOR_VNIC_TYPE.get(vnic_type) + if dev_type: + spec[pci_request.PCI_DEVICE_TYPE_TAG] = dev_type + request = objects.InstancePCIRequest( + count=1, + spec=[spec], + request_id=uuidutils.generate_uuid()) + pci_requests.requests.append(request) + pci_request_id = request.request_id + + # Add pci_request_id into the requested network + request_net.pci_request_id = pci_request_id + + def _can_auto_allocate_network(self, context, neutron): + """Helper method to determine if we can auto-allocate networks + + :param context: nova request context + :param neutron: neutron client + :returns: True if it's possible to auto-allocate networks, False + otherwise. + """ + # run the dry-run validation, which will raise a 409 if not ready + try: + neutron.validate_auto_allocated_topology_requirements( + context.project_id) + LOG.debug('Network auto-allocation is available for project ' + '%s', context.project_id) + return True + except neutron_client_exc.Conflict as ex: + LOG.debug('Unable to auto-allocate networks. %s', + six.text_type(ex)) + return False + + def _auto_allocate_network(self, instance, neutron): + """Automatically allocates a network for the given project. + + :param instance: create the network for the project that owns this + instance + :param neutron: neutron client + :returns: Details of the network that was created. + :raises: nova.exception.UnableToAutoAllocateNetwork + :raises: nova.exception.NetworkNotFound + """ + project_id = instance.project_id + LOG.debug('Automatically allocating a network for project %s.', + project_id, instance=instance) + try: + topology = neutron.get_auto_allocated_topology( + project_id)['auto_allocated_topology'] + except neutron_client_exc.Conflict: + raise exception.UnableToAutoAllocateNetwork(project_id=project_id) + + try: + network = neutron.show_network(topology['id'])['network'] + except neutron_client_exc.NetworkNotFoundClient: + # This shouldn't happen since we just created the network, but + # handle it anyway. + LOG.error('Automatically allocated network %(network_id)s ' + 'was not found.', {'network_id': topology['id']}, + instance=instance) + raise exception.UnableToAutoAllocateNetwork(project_id=project_id) + + LOG.debug('Automatically allocated network: %s', network, + instance=instance) + return network + + def _ports_needed_per_instance(self, context, neutron, requested_networks): + + # TODO(danms): Remove me when all callers pass an object + if requested_networks and isinstance(requested_networks[0], tuple): + requested_networks = objects.NetworkRequestList.from_tuples( + requested_networks) + + ports_needed_per_instance = 0 + if (requested_networks is None or len(requested_networks) == 0 or + requested_networks.auto_allocate): + nets = self._get_available_networks(context, context.project_id, + neutron=neutron) + if len(nets) > 1: + # Attaching to more than one network by default doesn't + # make sense, as the order will be arbitrary and the guest OS + # won't know which to configure + msg = _("Multiple possible networks found, use a Network " + "ID to be more specific.") + raise exception.NetworkAmbiguous(msg) + + if not nets and ( + requested_networks and requested_networks.auto_allocate): + # If there are no networks available to this project and we + # were asked to auto-allocate a network, check to see that we + # can do that first. + LOG.debug('No networks are available for project %s; checking ' + 'to see if we can automatically allocate a network.', + context.project_id) + if not self._can_auto_allocate_network(context, neutron): + raise exception.UnableToAutoAllocateNetwork( + project_id=context.project_id) + + ports_needed_per_instance = 1 + else: + net_ids_requested = [] + for request in requested_networks: + if request.port_id: + port = self._show_port(context, request.port_id, + neutron_client=neutron) + if port.get('device_id', None): + raise exception.PortInUse(port_id=request.port_id) + deferred_ip = port.get('ip_allocation') == 'deferred' + # NOTE(carl_baldwin) A deferred IP port doesn't have an + # address here. If it fails to get one later when nova + # updates it with host info, Neutron will error which + # raises an exception. + if not deferred_ip and not port.get('fixed_ips'): + raise exception.PortRequiresFixedIP( + port_id=request.port_id) + request.network_id = port['network_id'] + else: + ports_needed_per_instance += 1 + net_ids_requested.append(request.network_id) + + # NOTE(jecarey) There is currently a race condition. + # That is, if you have more than one request for a specific + # fixed IP at the same time then only one will be allocated + # the ip. The fixed IP will be allocated to only one of the + # instances that will run. The second instance will fail on + # spawn. That instance will go into error state. + # TODO(jecarey) Need to address this race condition once we + # have the ability to update mac addresses in Neutron. + if request.address: + # TODO(jecarey) Need to look at consolidating list_port + # calls once able to OR filters. + search_opts = {'network_id': request.network_id, + 'fixed_ips': 'ip_address=%s' % ( + request.address), + 'fields': 'device_id'} + existing_ports = neutron.list_ports( + **search_opts)['ports'] + if existing_ports: + i_uuid = existing_ports[0]['device_id'] + raise exception.FixedIpAlreadyInUse( + address=request.address, + instance_uuid=i_uuid) + + # Now check to see if all requested networks exist + if net_ids_requested: + nets = self._get_available_networks( + context, context.project_id, net_ids_requested, + neutron=neutron) + + for net in nets: + if not net.get('subnets'): + raise exception.NetworkRequiresSubnet( + network_uuid=net['id']) + + if len(nets) != len(net_ids_requested): + requested_netid_set = set(net_ids_requested) + returned_netid_set = set([net['id'] for net in nets]) + lostid_set = requested_netid_set - returned_netid_set + if lostid_set: + id_str = '' + for _id in lostid_set: + id_str = id_str and id_str + ', ' + _id or _id + raise exception.NetworkNotFound(network_id=id_str) + return ports_needed_per_instance + + def validate_networks(self, context, requested_networks, num_instances): + """Validate that the tenant can use the requested networks. + + Return the number of instances than can be successfully allocated + with the requested network configuration. + """ + LOG.debug('validate_networks() for %s', requested_networks) + + neutron = get_client(context) + ports_needed_per_instance = self._ports_needed_per_instance( + context, neutron, requested_networks) + + # Note(PhilD): Ideally Nova would create all required ports as part of + # network validation, but port creation requires some details + # from the hypervisor. So we just check the quota and return + # how many of the requested number of instances can be created + if ports_needed_per_instance: + quotas = neutron.show_quota(context.project_id)['quota'] + if quotas.get('port', -1) == -1: + # Unlimited Port Quota + return num_instances + + # We only need the port count so only ask for ids back. + params = dict(tenant_id=context.project_id, fields=['id']) + ports = neutron.list_ports(**params)['ports'] + free_ports = quotas.get('port') - len(ports) + if free_ports < 0: + msg = (_("The number of defined ports: %(ports)d " + "is over the limit: %(quota)d") % + {'ports': len(ports), + 'quota': quotas.get('port')}) + raise exception.PortLimitExceeded(msg) + ports_needed = ports_needed_per_instance * num_instances + if free_ports >= ports_needed: + return num_instances + else: + return free_ports // ports_needed_per_instance + return num_instances + + def _get_instance_uuids_by_ip(self, context, address): + """Retrieve instance uuids associated with the given IP address. + + :returns: A list of dicts containing the uuids keyed by 'instance_uuid' + e.g. [{'instance_uuid': uuid}, ...] + """ + search_opts = {"fixed_ips": 'ip_address=%s' % address} + data = get_client(context).list_ports(**search_opts) + ports = data.get('ports', []) + return [{'instance_uuid': port['device_id']} for port in ports + if port['device_id']] + + def _get_port_id_by_fixed_address(self, client, + instance, address): + """Return port_id from a fixed address.""" + zone = 'compute:%s' % instance.availability_zone + search_opts = {'device_id': instance.uuid, + 'device_owner': zone} + data = client.list_ports(**search_opts) + ports = data['ports'] + port_id = None + for p in ports: + for ip in p['fixed_ips']: + if ip['ip_address'] == address: + port_id = p['id'] + break + if not port_id: + raise exception.FixedIpNotFoundForAddress(address=address) + return port_id + + @base_api.refresh_cache + def associate_floating_ip(self, context, instance, + floating_address, fixed_address, + affect_auto_assigned=False): + """Associate a floating IP with a fixed IP.""" + + # Note(amotoki): 'affect_auto_assigned' is not respected + # since it is not used anywhere in nova code and I could + # find why this parameter exists. + + client = get_client(context) + port_id = self._get_port_id_by_fixed_address(client, instance, + fixed_address) + fip = self._get_floating_ip_by_address(client, floating_address) + param = {'port_id': port_id, + 'fixed_ip_address': fixed_address} + try: + client.update_floatingip(fip['id'], {'floatingip': param}) + except neutron_client_exc.Conflict as e: + raise exception.FloatingIpAssociateFailed(six.text_type(e)) + + if fip['port_id']: + port = self._show_port(context, fip['port_id'], + neutron_client=client) + orig_instance_uuid = port['device_id'] + + msg_dict = dict(address=floating_address, + instance_id=orig_instance_uuid) + LOG.info('re-assign floating IP %(address)s from ' + 'instance %(instance_id)s', msg_dict, + instance=instance) + orig_instance = objects.Instance.get_by_uuid(context, + orig_instance_uuid) + + # purge cached nw info for the original instance + base_api.update_instance_cache_with_nw_info(self, context, + orig_instance) + + def get_all(self, context): + """Get all networks for client.""" + client = get_client(context) + networks = client.list_networks().get('networks') + network_objs = [] + for network in networks: + network_objs.append(objects.Network(context=context, + name=network['name'], + label=network['name'], + uuid=network['id'])) + return objects.NetworkList(context=context, + objects=network_objs) + + def get(self, context, network_uuid): + """Get specific network for client.""" + client = get_client(context) + try: + network = client.show_network(network_uuid).get('network') or {} + except neutron_client_exc.NetworkNotFoundClient: + raise exception.NetworkNotFound(network_id=network_uuid) + net_obj = objects.Network(context=context, + name=network['name'], + label=network['name'], + uuid=network['id']) + return net_obj + + def delete(self, context, network_uuid): + """Delete a network for client.""" + raise NotImplementedError() + + def disassociate(self, context, network_uuid): + """Disassociate a network for client.""" + raise NotImplementedError() + + def associate(self, context, network_uuid, host=base_api.SENTINEL, + project=base_api.SENTINEL): + """Associate a network for client.""" + raise NotImplementedError() + + def get_fixed_ip(self, context, id): + """Get a fixed IP from the id.""" + raise NotImplementedError() + + def get_fixed_ip_by_address(self, context, address): + """Return instance uuids given an address.""" + uuid_maps = self._get_instance_uuids_by_ip(context, address) + if len(uuid_maps) == 1: + return uuid_maps[0] + elif not uuid_maps: + raise exception.FixedIpNotFoundForAddress(address=address) + else: + raise exception.FixedIpAssociatedWithMultipleInstances( + address=address) + + def _setup_net_dict(self, client, network_id): + if not network_id: + return {} + pool = client.show_network(network_id)['network'] + return {pool['id']: pool} + + def _setup_port_dict(self, context, client, port_id): + if not port_id: + return {} + port = self._show_port(context, port_id, neutron_client=client) + return {port['id']: port} + + def _setup_pools_dict(self, client): + pools = self._get_floating_ip_pools(client) + return {i['id']: i for i in pools} + + def _setup_ports_dict(self, client, project_id=None): + search_opts = {'tenant_id': project_id} if project_id else {} + ports = client.list_ports(**search_opts)['ports'] + return {p['id']: p for p in ports} + + def get_floating_ip(self, context, id): + """Return floating IP object given the floating IP id.""" + client = get_client(context) + try: + fip = client.show_floatingip(id)['floatingip'] + except neutron_client_exc.NeutronClientException as e: + if e.status_code == 404: + raise exception.FloatingIpNotFound(id=id) + else: + with excutils.save_and_reraise_exception(): + LOG.exception('Unable to access floating IP %s', id) + pool_dict = self._setup_net_dict(client, + fip['floating_network_id']) + port_dict = self._setup_port_dict(context, client, fip['port_id']) + return self._make_floating_ip_obj(context, fip, pool_dict, port_dict) + + def _get_floating_ip_pools(self, client, project_id=None): + search_opts = {constants.NET_EXTERNAL: True} + if project_id: + search_opts.update({'tenant_id': project_id}) + data = client.list_networks(**search_opts) + return data['networks'] + + def get_floating_ip_pools(self, context): + """Return floating IP pool names.""" + client = get_client(context) + pools = self._get_floating_ip_pools(client) + # Note(salv-orlando): Return a list of names to be consistent with + # nova.network.api.get_floating_ip_pools + return [n['name'] or n['id'] for n in pools] + + def _make_floating_ip_obj(self, context, fip, pool_dict, port_dict): + pool = pool_dict[fip['floating_network_id']] + # NOTE(danms): Don't give these objects a context, since they're + # not lazy-loadable anyway + floating = objects.floating_ip.NeutronFloatingIP( + id=fip['id'], address=fip['floating_ip_address'], + pool=(pool['name'] or pool['id']), project_id=fip['tenant_id'], + fixed_ip_id=fip['port_id']) + # In Neutron v2 API fixed_ip_address and instance uuid + # (= device_id) are known here, so pass it as a result. + if fip['fixed_ip_address']: + floating.fixed_ip = objects.FixedIP( + address=fip['fixed_ip_address']) + else: + floating.fixed_ip = None + if fip['port_id']: + instance_uuid = port_dict[fip['port_id']]['device_id'] + # NOTE(danms): This could be .refresh()d, so give it context + floating.instance = objects.Instance(context=context, + uuid=instance_uuid) + if floating.fixed_ip: + floating.fixed_ip.instance_uuid = instance_uuid + else: + floating.instance = None + return floating + + def get_floating_ip_by_address(self, context, address): + """Return a floating IP given an address.""" + client = get_client(context) + fip = self._get_floating_ip_by_address(client, address) + pool_dict = self._setup_net_dict(client, + fip['floating_network_id']) + port_dict = self._setup_port_dict(context, client, fip['port_id']) + return self._make_floating_ip_obj(context, fip, pool_dict, port_dict) + + def get_floating_ips_by_project(self, context): + client = get_client(context) + project_id = context.project_id + fips = self._safe_get_floating_ips(client, tenant_id=project_id) + if not fips: + return [] + pool_dict = self._setup_pools_dict(client) + port_dict = self._setup_ports_dict(client, project_id) + return [self._make_floating_ip_obj(context, fip, pool_dict, port_dict) + for fip in fips] + + def get_instance_id_by_floating_address(self, context, address): + """Return the instance id a floating IP's fixed IP is allocated to.""" + client = get_client(context) + fip = self._get_floating_ip_by_address(client, address) + if not fip['port_id']: + return None + + try: + port = self._show_port(context, fip['port_id'], + neutron_client=client) + except exception.PortNotFound: + # NOTE: Here is a potential race condition between _show_port() and + # _get_floating_ip_by_address(). fip['port_id'] shows a port which + # is the server instance's. At _get_floating_ip_by_address(), + # Neutron returns the list which includes the instance. Just after + # that, the deletion of the instance happens and Neutron returns + # 404 on _show_port(). + LOG.debug('The port(%s) is not found', fip['port_id']) + return None + + return port['device_id'] + + def get_vifs_by_instance(self, context, instance): + raise NotImplementedError() + + def get_vif_by_mac_address(self, context, mac_address): + raise NotImplementedError() + + def _get_floating_ip_pool_id_by_name_or_id(self, client, name_or_id): + search_opts = {constants.NET_EXTERNAL: True, 'fields': 'id'} + if uuidutils.is_uuid_like(name_or_id): + search_opts.update({'id': name_or_id}) + else: + search_opts.update({'name': name_or_id}) + data = client.list_networks(**search_opts) + nets = data['networks'] + + if len(nets) == 1: + return nets[0]['id'] + elif len(nets) == 0: + raise exception.FloatingIpPoolNotFound() + else: + msg = (_("Multiple floating IP pools matches found for name '%s'") + % name_or_id) + raise exception.NovaException(message=msg) + + def _get_default_floating_ip_pool_name(self): + """Get default pool name from config. + + TODO(stephenfin): Remove this helper function in Queens, opting to + use the [neutron] option only. + """ + if CONF.default_floating_pool != 'nova': + LOG.warning("Config option 'default_floating_pool' is set to " + "a non-default value. Falling back to this value " + "for now but this behavior will change in a " + "future release. You should unset this value " + "and set the '[neutron] default_floating_pool' " + "option instead.") + return CONF.default_floating_pool + + return CONF.neutron.default_floating_pool + + def allocate_floating_ip(self, context, pool=None): + """Add a floating IP to a project from a pool.""" + client = get_client(context) + pool = pool or self._get_default_floating_ip_pool_name() + pool_id = self._get_floating_ip_pool_id_by_name_or_id(client, pool) + + param = {'floatingip': {'floating_network_id': pool_id}} + try: + fip = client.create_floatingip(param) + except (neutron_client_exc.IpAddressGenerationFailureClient, + neutron_client_exc.ExternalIpAddressExhaustedClient) as e: + raise exception.NoMoreFloatingIps(six.text_type(e)) + except neutron_client_exc.OverQuotaClient as e: + raise exception.FloatingIpLimitExceeded(six.text_type(e)) + except neutron_client_exc.BadRequest as e: + raise exception.FloatingIpBadRequest(six.text_type(e)) + + return fip['floatingip']['floating_ip_address'] + + def _safe_get_floating_ips(self, client, **kwargs): + """Get floating IP gracefully handling 404 from Neutron.""" + try: + return client.list_floatingips(**kwargs)['floatingips'] + # If a neutron plugin does not implement the L3 API a 404 from + # list_floatingips will be raised. + except neutron_client_exc.NotFound: + return [] + except neutron_client_exc.NeutronClientException as e: + # bug/1513879 neutron client is currently using + # NeutronClientException when there is no L3 API + if e.status_code == 404: + return [] + with excutils.save_and_reraise_exception(): + LOG.exception('Unable to access floating IP for %s', + ', '.join(['%s %s' % (k, v) + for k, v in kwargs.items()])) + + def _get_floating_ip_by_address(self, client, address): + """Get floating IP from floating IP address.""" + if not address: + raise exception.FloatingIpNotFoundForAddress(address=address) + fips = self._safe_get_floating_ips(client, floating_ip_address=address) + if len(fips) == 0: + raise exception.FloatingIpNotFoundForAddress(address=address) + elif len(fips) > 1: + raise exception.FloatingIpMultipleFoundForAddress(address=address) + return fips[0] + + def _get_floating_ips_by_fixed_and_port(self, client, fixed_ip, port): + """Get floating IPs from fixed IP and port.""" + return self._safe_get_floating_ips(client, fixed_ip_address=fixed_ip, + port_id=port) + + def release_floating_ip(self, context, address, + affect_auto_assigned=False): + """Remove a floating IP with the given address from a project.""" + + # Note(amotoki): We cannot handle a case where multiple pools + # have overlapping IP address range. In this case we cannot use + # 'address' as a unique key. + # This is a limitation of the current nova. + + # Note(amotoki): 'affect_auto_assigned' is not respected + # since it is not used anywhere in nova code and I could + # find why this parameter exists. + + self._release_floating_ip(context, address) + + def disassociate_and_release_floating_ip(self, context, instance, + floating_ip): + """Removes (deallocates) and deletes the floating IP. + + This api call was added to allow this to be done in one operation + if using neutron. + """ + + @base_api.refresh_cache + def _release_floating_ip_and_refresh_cache(self, context, instance, + floating_ip): + self._release_floating_ip(context, floating_ip['address'], + raise_if_associated=False) + if instance: + _release_floating_ip_and_refresh_cache(self, context, instance, + floating_ip) + else: + self._release_floating_ip(context, floating_ip['address'], + raise_if_associated=False) + + def _release_floating_ip(self, context, address, + raise_if_associated=True): + client = get_client(context) + fip = self._get_floating_ip_by_address(client, address) + + if raise_if_associated and fip['port_id']: + raise exception.FloatingIpAssociated(address=address) + try: + client.delete_floatingip(fip['id']) + except neutron_client_exc.NotFound: + raise exception.FloatingIpNotFoundForAddress( + address=address + ) + + @base_api.refresh_cache + def disassociate_floating_ip(self, context, instance, address, + affect_auto_assigned=False): + """Disassociate a floating IP from the instance.""" + + # Note(amotoki): 'affect_auto_assigned' is not respected + # since it is not used anywhere in nova code and I could + # find why this parameter exists. + + client = get_client(context) + fip = self._get_floating_ip_by_address(client, address) + client.update_floatingip(fip['id'], {'floatingip': {'port_id': None}}) + + def migrate_instance_start(self, context, instance, migration): + """Start to migrate the network of an instance.""" + # NOTE(wenjianhn): just pass to make migrate instance doesn't + # raise for now. + pass + + def migrate_instance_finish(self, context, instance, migration): + """Finish migrating the network of an instance.""" + self._update_port_binding_for_instance(context, instance, + migration['dest_compute'], + migration=migration) + + def add_network_to_project(self, context, project_id, network_uuid=None): + """Force add a network to the project.""" + raise NotImplementedError() + + def _nw_info_get_ips(self, client, port): + network_IPs = [] + for fixed_ip in port['fixed_ips']: + fixed = network_model.FixedIP(address=fixed_ip['ip_address']) + floats = self._get_floating_ips_by_fixed_and_port( + client, fixed_ip['ip_address'], port['id']) + for ip in floats: + fip = network_model.IP(address=ip['floating_ip_address'], + type='floating') + fixed.add_floating_ip(fip) + network_IPs.append(fixed) + return network_IPs + + def _nw_info_get_subnets(self, context, port, network_IPs, client=None): + subnets = self._get_subnets_from_port(context, port, client) + for subnet in subnets: + subnet['ips'] = [fixed_ip for fixed_ip in network_IPs + if fixed_ip.is_in_subnet(subnet)] + return subnets + + def _nw_info_build_network(self, port, networks, subnets): + network_name = None + network_mtu = None + for net in networks: + if port['network_id'] == net['id']: + network_name = net['name'] + tenant_id = net['tenant_id'] + network_mtu = net.get('mtu') + break + else: + tenant_id = port['tenant_id'] + LOG.warning("Network %(id)s not matched with the tenants " + "network! The ports tenant %(tenant_id)s will be " + "used.", + {'id': port['network_id'], 'tenant_id': tenant_id}) + + bridge = None + ovs_interfaceid = None + # Network model metadata + should_create_bridge = None + vif_type = port.get('binding:vif_type') + port_details = port.get('binding:vif_details', {}) + if vif_type in [network_model.VIF_TYPE_OVS, + network_model.VIF_TYPE_AGILIO_OVS]: + bridge = port_details.get(network_model.VIF_DETAILS_BRIDGE_NAME, + CONF.neutron.ovs_bridge) + ovs_interfaceid = port['id'] + elif vif_type == network_model.VIF_TYPE_BRIDGE: + bridge = port_details.get(network_model.VIF_DETAILS_BRIDGE_NAME, + "brq" + port['network_id']) + should_create_bridge = True + elif vif_type == network_model.VIF_TYPE_DVS: + # The name of the DVS port group will contain the neutron + # network id + bridge = port['network_id'] + elif (vif_type == network_model.VIF_TYPE_VHOSTUSER and + port_details.get(network_model.VIF_DETAILS_VHOSTUSER_OVS_PLUG, + False)): + bridge = port_details.get(network_model.VIF_DETAILS_BRIDGE_NAME, + CONF.neutron.ovs_bridge) + ovs_interfaceid = port['id'] + elif (vif_type == network_model.VIF_TYPE_VHOSTUSER and + port_details.get(network_model.VIF_DETAILS_VHOSTUSER_FP_PLUG, + False)): + bridge = port_details.get(network_model.VIF_DETAILS_BRIDGE_NAME, + "brq" + port['network_id']) + + # Prune the bridge name if necessary. For the DVS this is not done + # as the bridge is a '-'. + if bridge is not None and vif_type != network_model.VIF_TYPE_DVS: + bridge = bridge[:network_model.NIC_NAME_LEN] + + network = network_model.Network( + id=port['network_id'], + bridge=bridge, + injected=CONF.flat_injected, + label=network_name, + tenant_id=tenant_id, + mtu=network_mtu + ) + network['subnets'] = subnets + port_profile = _get_binding_profile(port) + if port_profile: + physical_network = port_profile.get('physical_network') + if physical_network: + network['physical_network'] = physical_network + + if should_create_bridge is not None: + network['should_create_bridge'] = should_create_bridge + return network, ovs_interfaceid + + def _get_preexisting_port_ids(self, instance): + """Retrieve the preexisting ports associated with the given instance. + These ports were not created by nova and hence should not be + deallocated upon instance deletion. + """ + net_info = instance.get_network_info() + if not net_info: + LOG.debug('Instance cache missing network info.', + instance=instance) + return [vif['id'] for vif in net_info + if vif.get('preserve_on_delete')] + + def _build_vif_model(self, context, client, current_neutron_port, + networks, preexisting_port_ids): + """Builds a ``nova.network.model.VIF`` object based on the parameters + and current state of the port in Neutron. + + :param context: Request context. + :param client: Neutron client. + :param current_neutron_port: The current state of a Neutron port + from which to build the VIF object model. + :param networks: List of dicts which represent Neutron networks + associated with the ports currently attached to a given server + instance. + :param preexisting_port_ids: List of IDs of ports attached to a + given server instance which Nova did not create and therefore + should not delete when the port is detached from the server. + :return: nova.network.model.VIF object which represents a port in the + instance network info cache. + """ + vif_active = False + if (current_neutron_port['admin_state_up'] is False + or current_neutron_port['status'] == 'ACTIVE'): + vif_active = True + + network_IPs = self._nw_info_get_ips(client, + current_neutron_port) + subnets = self._nw_info_get_subnets(context, + current_neutron_port, + network_IPs, client) + + devname = "tap" + current_neutron_port['id'] + devname = devname[:network_model.NIC_NAME_LEN] + + network, ovs_interfaceid = ( + self._nw_info_build_network(current_neutron_port, + networks, subnets)) + preserve_on_delete = (current_neutron_port['id'] in + preexisting_port_ids) + + return network_model.VIF( + id=current_neutron_port['id'], + address=current_neutron_port['mac_address'], + network=network, + vnic_type=current_neutron_port.get('binding:vnic_type', + network_model.VNIC_TYPE_NORMAL), + type=current_neutron_port.get('binding:vif_type'), + profile=_get_binding_profile(current_neutron_port), + details=current_neutron_port.get('binding:vif_details'), + ovs_interfaceid=ovs_interfaceid, + devname=devname, + active=vif_active, + preserve_on_delete=preserve_on_delete) + + def _build_network_info_model(self, context, instance, networks=None, + port_ids=None, admin_client=None, + preexisting_port_ids=None): + """Return list of ordered VIFs attached to instance. + + :param context: Request context. + :param instance: Instance we are returning network info for. + :param networks: List of networks being attached to an instance. + If value is None this value will be populated + from the existing cached value. + :param port_ids: List of port_ids that are being attached to an + instance in order of attachment. If value is None + this value will be populated from the existing + cached value. + :param admin_client: A neutron client for the admin context. + :param preexisting_port_ids: List of port_ids that nova didn't + allocate and there shouldn't be deleted when + an instance is de-allocated. Supplied list will + be added to the cached list of preexisting port + IDs for this instance. + """ + + search_opts = {'tenant_id': instance.project_id, + 'device_id': instance.uuid, } + if admin_client is None: + client = get_client(context, admin=True) + else: + client = admin_client + + data = client.list_ports(**search_opts) + + current_neutron_ports = data.get('ports', []) + nw_info_refresh = networks is None and port_ids is None + networks, port_ids = self._gather_port_ids_and_networks( + context, instance, networks, port_ids, client) + nw_info = network_model.NetworkInfo() + + if preexisting_port_ids is None: + preexisting_port_ids = [] + preexisting_port_ids = set( + preexisting_port_ids + self._get_preexisting_port_ids(instance)) + + current_neutron_port_map = {} + for current_neutron_port in current_neutron_ports: + current_neutron_port_map[current_neutron_port['id']] = ( + current_neutron_port) + + for port_id in port_ids: + current_neutron_port = current_neutron_port_map.get(port_id) + if current_neutron_port: + vif = self._build_vif_model( + context, client, current_neutron_port, networks, + preexisting_port_ids) + nw_info.append(vif) + elif nw_info_refresh: + LOG.info('Port %s from network info_cache is no ' + 'longer associated with instance in Neutron. ' + 'Removing from network info_cache.', port_id, + instance=instance) + + return nw_info + + def _get_subnets_from_port(self, context, port, client=None): + """Return the subnets for a given port.""" + + fixed_ips = port['fixed_ips'] + # No fixed_ips for the port means there is no subnet associated + # with the network the port is created on. + # Since list_subnets(id=[]) returns all subnets visible for the + # current tenant, returned subnets may contain subnets which is not + # related to the port. To avoid this, the method returns here. + if not fixed_ips: + return [] + if not client: + client = get_client(context) + search_opts = {'id': [ip['subnet_id'] for ip in fixed_ips]} + data = client.list_subnets(**search_opts) + ipam_subnets = data.get('subnets', []) + subnets = [] + + for subnet in ipam_subnets: + subnet_dict = {'cidr': subnet['cidr'], + 'gateway': network_model.IP( + address=subnet['gateway_ip'], + type='gateway'), + } + if subnet.get('ipv6_address_mode'): + subnet_dict['ipv6_address_mode'] = subnet['ipv6_address_mode'] + + # attempt to populate DHCP server field + search_opts = {'network_id': subnet['network_id'], + 'device_owner': 'network:dhcp'} + data = client.list_ports(**search_opts) + dhcp_ports = data.get('ports', []) + for p in dhcp_ports: + for ip_pair in p['fixed_ips']: + if ip_pair['subnet_id'] == subnet['id']: + subnet_dict['dhcp_server'] = ip_pair['ip_address'] + break + + subnet_object = network_model.Subnet(**subnet_dict) + for dns in subnet.get('dns_nameservers', []): + subnet_object.add_dns( + network_model.IP(address=dns, type='dns')) + + for route in subnet.get('host_routes', []): + subnet_object.add_route( + network_model.Route(cidr=route['destination'], + gateway=network_model.IP( + address=route['nexthop'], + type='gateway'))) + + subnets.append(subnet_object) + return subnets + + def get_dns_domains(self, context): + """Return a list of available dns domains. + + These can be used to create DNS entries for floating IPs. + """ + raise NotImplementedError() + + def add_dns_entry(self, context, address, name, dns_type, domain): + """Create specified DNS entry for address.""" + raise NotImplementedError() + + def modify_dns_entry(self, context, name, address, domain): + """Create specified DNS entry for address.""" + raise NotImplementedError() + + def delete_dns_entry(self, context, name, domain): + """Delete the specified dns entry.""" + raise NotImplementedError() + + def delete_dns_domain(self, context, domain): + """Delete the specified dns domain.""" + raise NotImplementedError() + + def get_dns_entries_by_address(self, context, address, domain): + """Get entries for address and domain.""" + raise NotImplementedError() + + def get_dns_entries_by_name(self, context, name, domain): + """Get entries for name and domain.""" + raise NotImplementedError() + + def create_private_dns_domain(self, context, domain, availability_zone): + """Create a private DNS domain with nova availability zone.""" + raise NotImplementedError() + + def create_public_dns_domain(self, context, domain, project=None): + """Create a private DNS domain with optional nova project.""" + raise NotImplementedError() + + def setup_instance_network_on_host(self, context, instance, host): + """Setup network for specified instance on host.""" + self._update_port_binding_for_instance(context, instance, host) + + def cleanup_instance_network_on_host(self, context, instance, host): + """Cleanup network for specified instance on host.""" + pass + + def _get_pci_devices_from_migration_context(self, migration_context, + migration): + if migration and migration.get('status') == 'reverted': + # In case of revert, swap old and new devices to + # update the ports back to the original devices. + return (migration_context.new_pci_devices, + migration_context.old_pci_devices) + return (migration_context.old_pci_devices, + migration_context.new_pci_devices) + + def _get_pci_mapping_for_migration(self, context, instance, migration): + """Get the mapping between the old PCI devices and the new PCI + devices that have been allocated during this migration. The + correlation is based on PCI request ID which is unique per PCI + devices for SR-IOV ports. + + :param context: The request context. + :param instance: Get PCI mapping for this instance. + :param migration: The migration for this instance. + :Returns: dictionary of mapping {'': } + """ + migration_context = instance.migration_context + if not migration_context: + return {} + + old_pci_devices, new_pci_devices = \ + self._get_pci_devices_from_migration_context(migration_context, + migration) + if old_pci_devices and new_pci_devices: + LOG.debug("Determining PCI devices mapping using migration" + "context: old_pci_devices: %(old)s, " + "new_pci_devices: %(new)s", + {'old': [dev for dev in old_pci_devices], + 'new': [dev for dev in new_pci_devices]}) + return {old.address: new + for old in old_pci_devices + for new in new_pci_devices + if old.request_id == new.request_id} + return {} + + def _update_port_binding_for_instance(self, context, instance, host, + migration=None): + neutron = get_client(context, admin=True) + search_opts = {'device_id': instance.uuid, + 'tenant_id': instance.project_id} + data = neutron.list_ports(**search_opts) + pci_mapping = None + port_updates = [] + ports = data['ports'] + for p in ports: + updates = {} + binding_profile = _get_binding_profile(p) + + # If the host hasn't changed, like in the case of resizing to the + # same host, there is nothing to do. + if p.get(BINDING_HOST_ID) != host: + updates[BINDING_HOST_ID] = host + # NOTE: Before updating the port binding make sure we + # remove the pre-migration status from the binding profile + if binding_profile.get(MIGRATING_ATTR): + del binding_profile[MIGRATING_ATTR] + updates[BINDING_PROFILE] = binding_profile + + # Update port with newly allocated PCI devices. Even if the + # resize is happening on the same host, a new PCI device can be + # allocated. Note that this only needs to happen if a migration + # is in progress such as in a resize / migrate. It is possible + # that this function is called without a migration object, such + # as in an unshelve operation. + vnic_type = p.get('binding:vnic_type') + if (vnic_type in network_model.VNIC_TYPES_SRIOV + and migration is not None): + if not pci_mapping: + pci_mapping = self._get_pci_mapping_for_migration(context, + instance, migration) + + pci_slot = binding_profile.get('pci_slot') + new_dev = pci_mapping.get(pci_slot) + if new_dev: + binding_profile.update( + self._get_pci_device_profile(new_dev)) + updates[BINDING_PROFILE] = binding_profile + else: + raise exception.PortUpdateFailed(port_id=p['id'], + reason=_("Unable to correlate PCI slot %s") % + pci_slot) + + port_updates.append((p['id'], updates)) + + # Avoid rolling back updates if we catch an error above. + # TODO(lbeliveau): Batch up the port updates in one neutron call. + for port_id, updates in port_updates: + if updates: + LOG.info("Updating port %(port)s with " + "attributes %(attributes)s", + {"port": port_id, "attributes": updates}, + instance=instance) + try: + neutron.update_port(port_id, {'port': updates}) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.exception("Unable to update binding details " + "for port %s", + port_id, instance=instance) + + def update_instance_vnic_index(self, context, instance, vif, index): + """Update instance vnic index. + + When the 'VNIC index' extension is supported this method will update + the vnic index of the instance on the port. + """ + self._refresh_neutron_extensions_cache(context) + if constants.VNIC_INDEX_EXT in self.extensions: + neutron = get_client(context) + port_req_body = {'port': {'vnic_index': index}} + try: + neutron.update_port(vif['id'], port_req_body) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.exception('Unable to update instance VNIC index ' + 'for port %s.', + vif['id'], instance=instance) + + +def _ensure_requested_network_ordering(accessor, unordered, preferred): + """Sort a list with respect to the preferred network ordering.""" + if preferred: + unordered.sort(key=lambda i: preferred.index(accessor(i))) diff --git a/nova/objects/aggregate.py b/nova/objects/aggregate.py new file mode 100644 index 00000000..22396ec6 --- /dev/null +++ b/nova/objects/aggregate.py @@ -0,0 +1,617 @@ +# Copyright 2013 IBM Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_db import exception as db_exc +from oslo_log import log as logging +from oslo_utils import excutils +from oslo_utils import uuidutils +import six +from sqlalchemy.orm import contains_eager +from sqlalchemy.orm import joinedload +from sqlalchemy.sql import func +from sqlalchemy.sql import text + +from nova.compute import utils as compute_utils +from nova import db +from nova.db.sqlalchemy import api as db_api +from nova.db.sqlalchemy import api_models +from nova.db.sqlalchemy import models as main_models +from nova import exception +from nova.i18n import _ +from nova import objects +from nova.objects import base +from nova.objects import fields + +LOG = logging.getLogger(__name__) + +DEPRECATED_FIELDS = ['deleted', 'deleted_at'] + + +@db_api.api_context_manager.reader +def _aggregate_get_from_db(context, aggregate_id): + query = context.session.query(api_models.Aggregate).\ + options(joinedload('_hosts')).\ + options(joinedload('_metadata')) + query = query.filter(api_models.Aggregate.id == aggregate_id) + + aggregate = query.first() + + if not aggregate: + raise exception.AggregateNotFound(aggregate_id=aggregate_id) + + return aggregate + + +@db_api.api_context_manager.reader +def _aggregate_get_from_db_by_uuid(context, aggregate_uuid): + query = context.session.query(api_models.Aggregate).\ + options(joinedload('_hosts')).\ + options(joinedload('_metadata')) + query = query.filter(api_models.Aggregate.uuid == aggregate_uuid) + + aggregate = query.first() + + if not aggregate: + raise exception.AggregateNotFound(aggregate_id=aggregate_uuid) + + return aggregate + + +def _host_add_to_db(context, aggregate_id, host): + try: + with db_api.api_context_manager.writer.using(context): + # Check to see if the aggregate exists + _aggregate_get_from_db(context, aggregate_id) + + host_ref = api_models.AggregateHost() + host_ref.update({"host": host, "aggregate_id": aggregate_id}) + host_ref.save(context.session) + return host_ref + except db_exc.DBDuplicateEntry: + raise exception.AggregateHostExists(host=host, + aggregate_id=aggregate_id) + + +def _host_delete_from_db(context, aggregate_id, host): + count = 0 + with db_api.api_context_manager.writer.using(context): + # Check to see if the aggregate exists + _aggregate_get_from_db(context, aggregate_id) + + query = context.session.query(api_models.AggregateHost) + query = query.filter(api_models.AggregateHost.aggregate_id == + aggregate_id) + count = query.filter_by(host=host).delete() + + if count == 0: + raise exception.AggregateHostNotFound(aggregate_id=aggregate_id, + host=host) + + +def _metadata_add_to_db(context, aggregate_id, metadata, max_retries=10, + set_delete=False): + all_keys = metadata.keys() + for attempt in range(max_retries): + try: + with db_api.api_context_manager.writer.using(context): + query = context.session.query(api_models.AggregateMetadata).\ + filter_by(aggregate_id=aggregate_id) + + if set_delete: + query.filter(~api_models.AggregateMetadata.key. + in_(all_keys)).\ + delete(synchronize_session=False) + + already_existing_keys = set() + if all_keys: + query = query.filter( + api_models.AggregateMetadata.key.in_(all_keys)) + for meta_ref in query.all(): + key = meta_ref.key + meta_ref.update({"value": metadata[key]}) + already_existing_keys.add(key) + + new_entries = [] + for key, value in metadata.items(): + if key in already_existing_keys: + continue + new_entries.append({"key": key, + "value": value, + "aggregate_id": aggregate_id}) + if new_entries: + context.session.execute( + api_models.AggregateMetadata.__table__.insert(), + new_entries) + + return metadata + except db_exc.DBDuplicateEntry: + # a concurrent transaction has been committed, + # try again unless this was the last attempt + with excutils.save_and_reraise_exception() as ctxt: + if attempt < max_retries - 1: + ctxt.reraise = False + else: + msg = _("Add metadata failed for aggregate %(id)s " + "after %(retries)s retries") % \ + {"id": aggregate_id, "retries": max_retries} + LOG.warning(msg) + + +@db_api.api_context_manager.writer +def _metadata_delete_from_db(context, aggregate_id, key): + # Check to see if the aggregate exists + _aggregate_get_from_db(context, aggregate_id) + + query = context.session.query(api_models.AggregateMetadata) + query = query.filter(api_models.AggregateMetadata.aggregate_id == + aggregate_id) + count = query.filter_by(key=key).delete() + + if count == 0: + raise exception.AggregateMetadataNotFound( + aggregate_id=aggregate_id, metadata_key=key) + + +@db_api.api_context_manager.writer +def _aggregate_create_in_db(context, values, metadata=None): + query = context.session.query(api_models.Aggregate) + query = query.filter(api_models.Aggregate.name == values['name']) + aggregate = query.first() + + if not aggregate: + aggregate = api_models.Aggregate() + aggregate.update(values) + aggregate.save(context.session) + # We don't want these to be lazy loaded later. We know there is + # nothing here since we just created this aggregate. + aggregate._hosts = [] + aggregate._metadata = [] + else: + raise exception.AggregateNameExists(aggregate_name=values['name']) + if metadata: + _metadata_add_to_db(context, aggregate.id, metadata) + context.session.expire(aggregate, ['_metadata']) + aggregate._metadata + + return aggregate + + +@db_api.api_context_manager.writer +def _aggregate_delete_from_db(context, aggregate_id): + # Delete Metadata first + context.session.query(api_models.AggregateMetadata).\ + filter_by(aggregate_id=aggregate_id).\ + delete() + + count = context.session.query(api_models.Aggregate).\ + filter(api_models.Aggregate.id == aggregate_id).\ + delete() + + if count == 0: + raise exception.AggregateNotFound(aggregate_id=aggregate_id) + + +@db_api.api_context_manager.writer +def _aggregate_update_to_db(context, aggregate_id, values): + aggregate = _aggregate_get_from_db(context, aggregate_id) + + set_delete = True + if "availability_zone" in values: + az = values.pop('availability_zone') + if 'metadata' not in values: + values['metadata'] = {'availability_zone': az} + set_delete = False + else: + values['metadata']['availability_zone'] = az + metadata = values.get('metadata') + if metadata is not None: + _metadata_add_to_db(context, aggregate_id, values.pop('metadata'), + set_delete=set_delete) + + aggregate.update(values) + try: + aggregate.save(context.session) + except db_exc.DBDuplicateEntry: + if 'name' in values: + raise exception.AggregateNameExists( + aggregate_name=values['name']) + else: + raise + return _aggregate_get_from_db(context, aggregate_id) + + +@base.NovaObjectRegistry.register +class Aggregate(base.NovaPersistentObject, base.NovaObject): + # Version 1.0: Initial version + # Version 1.1: String attributes updated to support unicode + # Version 1.2: Added uuid field + # Version 1.3: Added get_by_uuid method + VERSION = '1.3' + + fields = { + 'id': fields.IntegerField(), + 'uuid': fields.UUIDField(nullable=False), + 'name': fields.StringField(), + 'hosts': fields.ListOfStringsField(nullable=True), + 'metadata': fields.DictOfStringsField(nullable=True), + } + + obj_extra_fields = ['availability_zone'] + + def __init__(self, *args, **kwargs): + super(Aggregate, self).__init__(*args, **kwargs) + self._in_api = False + + @staticmethod + def _from_db_object(context, aggregate, db_aggregate): + for key in aggregate.fields: + if key == 'metadata': + db_key = 'metadetails' + elif key in DEPRECATED_FIELDS and key not in db_aggregate: + continue + else: + db_key = key + setattr(aggregate, key, db_aggregate[db_key]) + + # NOTE: This can be removed when we remove compatibility with + # the old aggregate model. + if any(f not in db_aggregate for f in DEPRECATED_FIELDS): + aggregate.deleted_at = None + aggregate.deleted = False + + aggregate._context = context + aggregate.obj_reset_changes() + + return aggregate + + def _assert_no_hosts(self, action): + if 'hosts' in self.obj_what_changed(): + raise exception.ObjectActionError( + action=action, + reason='hosts updated inline') + + @property + def in_api(self): + if self._in_api: + return True + else: + try: + _aggregate_get_from_db(self._context, self.id) + self._in_api = True + except exception.AggregateNotFound: + pass + return self._in_api + + @base.remotable_classmethod + def get_by_id(cls, context, aggregate_id): + try: + db_aggregate = _aggregate_get_from_db(context, aggregate_id) + except exception.AggregateNotFound: + db_aggregate = db.aggregate_get(context, aggregate_id) + return cls._from_db_object(context, cls(), db_aggregate) + + @base.remotable_classmethod + def get_by_uuid(cls, context, aggregate_uuid): + try: + db_aggregate = _aggregate_get_from_db_by_uuid(context, + aggregate_uuid) + except exception.AggregateNotFound: + db_aggregate = db.aggregate_get_by_uuid(context, aggregate_uuid) + return cls._from_db_object(context, cls(), db_aggregate) + + @staticmethod + @db_api.pick_context_manager_reader + def _ensure_migrated(context): + result = context.session.query(main_models.Aggregate).\ + filter_by(deleted=0).count() + if result: + LOG.warning( + 'Main database contains %(count)i unmigrated aggregates', + {'count': result}) + return result == 0 + + @base.remotable + def create(self): + if self.obj_attr_is_set('id'): + raise exception.ObjectActionError(action='create', + reason='already created') + + # NOTE(mdoff): Once we have made it past a point where we know + # all aggregates have been migrated, we can remove this. Ideally + # in Ocata with a blocker migration to be sure. + if not self._ensure_migrated(self._context): + raise exception.ObjectActionError( + action='create', + reason='main database still contains aggregates') + + self._assert_no_hosts('create') + updates = self.obj_get_changes() + payload = dict(updates) + if 'metadata' in updates: + # NOTE(danms): For some reason the notification format is weird + payload['meta_data'] = payload.pop('metadata') + if 'uuid' not in updates: + updates['uuid'] = uuidutils.generate_uuid() + self.uuid = updates['uuid'] + LOG.debug('Generated uuid %(uuid)s for aggregate', + dict(uuid=updates['uuid'])) + compute_utils.notify_about_aggregate_update(self._context, + "create.start", + payload) + compute_utils.notify_about_aggregate_action( + context=self._context, + aggregate=self, + action=fields.NotificationAction.CREATE, + phase=fields.NotificationPhase.START) + + metadata = updates.pop('metadata', None) + db_aggregate = _aggregate_create_in_db(self._context, updates, + metadata=metadata) + self._from_db_object(self._context, self, db_aggregate) + payload['aggregate_id'] = self.id + compute_utils.notify_about_aggregate_update(self._context, + "create.end", + payload) + compute_utils.notify_about_aggregate_action( + context=self._context, + aggregate=self, + action=fields.NotificationAction.CREATE, + phase=fields.NotificationPhase.END) + + @base.remotable + def save(self): + self._assert_no_hosts('save') + updates = self.obj_get_changes() + + payload = {'aggregate_id': self.id} + if 'metadata' in updates: + payload['meta_data'] = updates['metadata'] + compute_utils.notify_about_aggregate_update(self._context, + "updateprop.start", + payload) + updates.pop('id', None) + try: + db_aggregate = _aggregate_update_to_db(self._context, + self.id, updates) + except exception.AggregateNotFound: + db_aggregate = db.aggregate_update(self._context, self.id, updates) + + compute_utils.notify_about_aggregate_update(self._context, + "updateprop.end", + payload) + self._from_db_object(self._context, self, db_aggregate) + + @base.remotable + def update_metadata(self, updates): + if self.in_api: + metadata_delete = _metadata_delete_from_db + metadata_add = _metadata_add_to_db + else: + metadata_delete = db.aggregate_metadata_delete + metadata_add = db.aggregate_metadata_add + + payload = {'aggregate_id': self.id, + 'meta_data': updates} + compute_utils.notify_about_aggregate_update(self._context, + "updatemetadata.start", + payload) + to_add = {} + for key, value in updates.items(): + if value is None: + try: + metadata_delete(self._context, self.id, key) + except exception.AggregateMetadataNotFound: + pass + try: + self.metadata.pop(key) + except KeyError: + pass + else: + to_add[key] = value + self.metadata[key] = value + metadata_add(self._context, self.id, to_add) + compute_utils.notify_about_aggregate_update(self._context, + "updatemetadata.end", + payload) + self.obj_reset_changes(fields=['metadata']) + + @base.remotable + def destroy(self): + try: + _aggregate_delete_from_db(self._context, self.id) + except exception.AggregateNotFound: + db.aggregate_delete(self._context, self.id) + + @base.remotable + def add_host(self, host): + if self.in_api: + _host_add_to_db(self._context, self.id, host) + else: + db.aggregate_host_add(self._context, self.id, host) + + if self.hosts is None: + self.hosts = [] + self.hosts.append(host) + self.obj_reset_changes(fields=['hosts']) + + @base.remotable + def delete_host(self, host): + if self.in_api: + _host_delete_from_db(self._context, self.id, host) + else: + db.aggregate_host_delete(self._context, self.id, host) + + self.hosts.remove(host) + self.obj_reset_changes(fields=['hosts']) + + @property + def availability_zone(self): + return self.metadata.get('availability_zone', None) + + +@db_api.api_context_manager.reader +def _get_all_from_db(context): + query = context.session.query(api_models.Aggregate).\ + options(joinedload('_hosts')).\ + options(joinedload('_metadata')) + + return query.all() + + +@db_api.api_context_manager.reader +def _get_by_host_from_db(context, host, key=None): + query = context.session.query(api_models.Aggregate).\ + options(joinedload('_hosts')).\ + options(joinedload('_metadata')) + query = query.join('_hosts') + query = query.filter(api_models.AggregateHost.host == host) + + if key: + query = query.join("_metadata").filter( + api_models.AggregateMetadata.key == key) + + return query.all() + + +@db_api.api_context_manager.reader +def _get_by_metadata_key_from_db(context, key): + query = context.session.query(api_models.Aggregate) + query = query.join("_metadata") + query = query.filter(api_models.AggregateMetadata.key == key) + query = query.options(contains_eager("_metadata")) + query = query.options(joinedload("_hosts")) + + return query.all() + + +@base.NovaObjectRegistry.register +class AggregateList(base.ObjectListBase, base.NovaObject): + # Version 1.0: Initial version + # Version 1.1: Added key argument to get_by_host() + # Aggregate <= version 1.1 + # Version 1.2: Added get_by_metadata_key + VERSION = '1.2' + + fields = { + 'objects': fields.ListOfObjectsField('Aggregate'), + } + + # NOTE(mdoff): Calls to this can be removed when we remove + # compatibility with the old aggregate model. + @staticmethod + def _fill_deprecated(db_aggregate): + db_aggregate['deleted_at'] = None + db_aggregate['deleted'] = False + return db_aggregate + + @classmethod + def _filter_db_aggregates(cls, db_aggregates, hosts): + if not isinstance(hosts, set): + hosts = set(hosts) + filtered_aggregates = [] + for db_aggregate in db_aggregates: + for host in db_aggregate['hosts']: + if host in hosts: + filtered_aggregates.append(db_aggregate) + break + return filtered_aggregates + + @base.remotable_classmethod + def get_all(cls, context): + api_db_aggregates = [cls._fill_deprecated(agg) for agg in + _get_all_from_db(context)] + db_aggregates = db.aggregate_get_all(context) + return base.obj_make_list(context, cls(context), objects.Aggregate, + db_aggregates + api_db_aggregates) + + @base.remotable_classmethod + def get_by_host(cls, context, host, key=None): + api_db_aggregates = [cls._fill_deprecated(agg) for agg in + _get_by_host_from_db(context, host, key=key)] + db_aggregates = db.aggregate_get_by_host(context, host, key=key) + return base.obj_make_list(context, cls(context), objects.Aggregate, + db_aggregates + api_db_aggregates) + + @base.remotable_classmethod + def get_by_metadata_key(cls, context, key, hosts=None): + api_db_aggregates = [cls._fill_deprecated(agg) for agg in + _get_by_metadata_key_from_db(context, key=key)] + db_aggregates = db.aggregate_get_by_metadata_key(context, key=key) + + all_aggregates = db_aggregates + api_db_aggregates + if hosts is not None: + all_aggregates = cls._filter_db_aggregates(all_aggregates, hosts) + return base.obj_make_list(context, cls(context), objects.Aggregate, + all_aggregates) + + +@db_api.pick_context_manager_reader +def _get_main_db_aggregate_ids(context, limit): + from nova.db.sqlalchemy import models + return [x[0] for x in context.session.query(models.Aggregate.id). + filter_by(deleted=0). + limit(limit)] + + +def migrate_aggregates(ctxt, count): + main_db_ids = _get_main_db_aggregate_ids(ctxt, count) + if not main_db_ids: + return 0, 0 + + count_all = len(main_db_ids) + count_hit = 0 + + for aggregate_id in main_db_ids: + try: + aggregate = Aggregate.get_by_id(ctxt, aggregate_id) + remove = ['metadata', 'hosts'] + values = {field: getattr(aggregate, field) + for field in aggregate.fields if field not in remove} + _aggregate_create_in_db(ctxt, values, metadata=aggregate.metadata) + for host in aggregate.hosts: + _host_add_to_db(ctxt, aggregate_id, host) + count_hit += 1 + db.aggregate_delete(ctxt, aggregate.id) + except exception.AggregateNotFound: + LOG.warning( + 'Aggregate id %(id)i disappeared during migration', + {'id': aggregate_id}) + except (exception.AggregateNameExists) as e: + LOG.error(six.text_type(e)) + + return count_all, count_hit + + +def _adjust_autoincrement(context, value): + engine = db_api.get_api_engine() + if engine.name == 'postgresql': + # NOTE(danms): If we migrated some aggregates in the above function, + # then we will have confused postgres' sequence for the autoincrement + # primary key. MySQL does not care about this, but since postgres does, + # we need to reset this to avoid a failure on the next aggregate + # creation. + engine.execute( + text('ALTER SEQUENCE aggregates_id_seq RESTART WITH %i;' % ( + value))) + + +@db_api.api_context_manager.reader +def _get_max_aggregate_id(context): + return context.session.query(func.max(api_models.Aggregate.id)).one()[0] + + +def migrate_aggregate_reset_autoincrement(ctxt, count): + max_id = _get_max_aggregate_id(ctxt) or 0 + _adjust_autoincrement(ctxt, max_id + 1) + return 0, 0 diff --git a/nova/objects/instance_fault.py b/nova/objects/instance_fault.py new file mode 100644 index 00000000..46db8bd8 --- /dev/null +++ b/nova/objects/instance_fault.py @@ -0,0 +1,120 @@ +# Copyright 2013 IBM Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import itertools + +from oslo_log import log as logging + +from nova.cells import opts as cells_opts +from nova.cells import rpcapi as cells_rpcapi +from nova import db +from nova import exception +from nova import objects +from nova.objects import base +from nova.objects import fields + + +LOG = logging.getLogger(__name__) + + +# TODO(berrange): Remove NovaObjectDictCompat +@base.NovaObjectRegistry.register +class InstanceFault(base.NovaPersistentObject, base.NovaObject, + base.NovaObjectDictCompat): + # Version 1.0: Initial version + # Version 1.1: String attributes updated to support unicode + # Version 1.2: Added create() + VERSION = '1.2' + + fields = { + 'id': fields.IntegerField(), + 'instance_uuid': fields.UUIDField(), + 'code': fields.IntegerField(), + 'message': fields.StringField(nullable=True), + 'details': fields.StringField(nullable=True), + 'host': fields.StringField(nullable=True), + } + + @staticmethod + def _from_db_object(context, fault, db_fault): + # NOTE(danms): These are identical right now + for key in fault.fields: + fault[key] = db_fault[key] + fault._context = context + fault.obj_reset_changes() + return fault + + @base.remotable_classmethod + def get_latest_for_instance(cls, context, instance_uuid): + db_faults = db.instance_fault_get_by_instance_uuids(context, + [instance_uuid]) + if instance_uuid in db_faults and db_faults[instance_uuid]: + return cls._from_db_object(context, cls(), + db_faults[instance_uuid][0]) + + @base.remotable + def create(self): + if self.obj_attr_is_set('id'): + raise exception.ObjectActionError(action='create', + reason='already created') + values = { + 'instance_uuid': self.instance_uuid, + 'code': self.code, + 'message': self.message, + 'details': self.details, + 'host': self.host, + } + db_fault = db.instance_fault_create(self._context, values) + self._from_db_object(self._context, self, db_fault) + self.obj_reset_changes() + # Cells should only try sending a message over to nova-cells + # if cells is enabled and we're not the API cell. Otherwise, + # if the API cell is calling this, we could end up with + # infinite recursion. + if cells_opts.get_cell_type() == 'compute': + try: + cells_rpcapi.CellsAPI().instance_fault_create_at_top( + self._context, db_fault) + except Exception: + LOG.exception("Failed to notify cells of instance fault") + + +@base.NovaObjectRegistry.register +class InstanceFaultList(base.ObjectListBase, base.NovaObject): + # Version 1.0: Initial version + # InstanceFault <= version 1.1 + # Version 1.1: InstanceFault version 1.2 + # Version 1.2: Added get_latest_by_instance_uuids() method + VERSION = '1.2' + + fields = { + 'objects': fields.ListOfObjectsField('InstanceFault'), + } + + @base.remotable_classmethod + def get_latest_by_instance_uuids(cls, context, instance_uuids): + db_faultdict = db.instance_fault_get_by_instance_uuids(context, + instance_uuids, + latest=True) + db_faultlist = itertools.chain(*db_faultdict.values()) + return base.obj_make_list(context, cls(context), objects.InstanceFault, + db_faultlist) + + @base.remotable_classmethod + def get_by_instance_uuids(cls, context, instance_uuids): + db_faultdict = db.instance_fault_get_by_instance_uuids(context, + instance_uuids) + db_faultlist = itertools.chain(*db_faultdict.values()) + return base.obj_make_list(context, cls(context), objects.InstanceFault, + db_faultlist) diff --git a/nova/objects/instance_group.py b/nova/objects/instance_group.py new file mode 100644 index 00000000..53d4bb1c --- /dev/null +++ b/nova/objects/instance_group.py @@ -0,0 +1,592 @@ +# Copyright (c) 2013 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import copy + +from oslo_db import exception as db_exc +from oslo_utils import uuidutils +from oslo_utils import versionutils +from sqlalchemy.orm import contains_eager +from sqlalchemy.orm import joinedload + +from nova.compute import utils as compute_utils +from nova import db +from nova.db.sqlalchemy import api as db_api +from nova.db.sqlalchemy import api_models +from nova.db.sqlalchemy import models as main_models +from nova import exception +from nova import objects +from nova.objects import base +from nova.objects import fields + + +LAZY_LOAD_FIELDS = ['hosts'] + + +def _instance_group_get_query(context, id_field=None, id=None): + query = context.session.query(api_models.InstanceGroup).\ + options(joinedload('_policies')).\ + options(joinedload('_members')) + if not context.is_admin: + query = query.filter_by(project_id=context.project_id) + if id and id_field: + query = query.filter(id_field == id) + return query + + +def _instance_group_model_get_query(context, model_class, group_id): + return context.session.query(model_class).filter_by(group_id=group_id) + + +def _instance_group_model_add(context, model_class, items, item_models, field, + group_id, append_to_models=None): + models = [] + already_existing = set() + for db_item in item_models: + already_existing.add(getattr(db_item, field)) + models.append(db_item) + for item in items: + if item in already_existing: + continue + model = model_class() + values = {'group_id': group_id} + values[field] = item + model.update(values) + context.session.add(model) + if append_to_models: + append_to_models.append(model) + models.append(model) + return models + + +def _instance_group_policies_add(context, group, policies): + query = _instance_group_model_get_query(context, + api_models.InstanceGroupPolicy, + group.id) + query = query.filter( + api_models.InstanceGroupPolicy.policy.in_(set(policies))) + return _instance_group_model_add(context, api_models.InstanceGroupPolicy, + policies, query.all(), 'policy', group.id, + append_to_models=group._policies) + + +def _instance_group_members_add(context, group, members): + query = _instance_group_model_get_query(context, + api_models.InstanceGroupMember, + group.id) + query = query.filter( + api_models.InstanceGroupMember.instance_uuid.in_(set(members))) + return _instance_group_model_add(context, api_models.InstanceGroupMember, + members, query.all(), 'instance_uuid', + group.id, append_to_models=group._members) + + +def _instance_group_members_add_by_uuid(context, group_uuid, members): + # NOTE(melwitt): The condition on the join limits the number of members + # returned to only those we wish to check as already existing. + group = context.session.query(api_models.InstanceGroup).\ + outerjoin(api_models.InstanceGroupMember, + api_models.InstanceGroupMember.instance_uuid.in_(set(members))).\ + filter(api_models.InstanceGroup.uuid == group_uuid).\ + options(contains_eager('_members')).first() + if not group: + raise exception.InstanceGroupNotFound(group_uuid=group_uuid) + return _instance_group_model_add(context, api_models.InstanceGroupMember, + members, group._members, 'instance_uuid', + group.id) + + +# TODO(berrange): Remove NovaObjectDictCompat +@base.NovaObjectRegistry.register +class InstanceGroup(base.NovaPersistentObject, base.NovaObject, + base.NovaObjectDictCompat): + # Version 1.0: Initial version + # Version 1.1: String attributes updated to support unicode + # Version 1.2: Use list/dict helpers for policies, metadetails, members + # Version 1.3: Make uuid a non-None real string + # Version 1.4: Add add_members() + # Version 1.5: Add get_hosts() + # Version 1.6: Add get_by_name() + # Version 1.7: Deprecate metadetails + # Version 1.8: Add count_members_by_user() + # Version 1.9: Add get_by_instance_uuid() + # Version 1.10: Add hosts field + VERSION = '1.10' + + fields = { + 'id': fields.IntegerField(), + + 'user_id': fields.StringField(nullable=True), + 'project_id': fields.StringField(nullable=True), + + 'uuid': fields.UUIDField(), + 'name': fields.StringField(nullable=True), + + 'policies': fields.ListOfStringsField(nullable=True), + 'members': fields.ListOfStringsField(nullable=True), + 'hosts': fields.ListOfStringsField(nullable=True), + } + + def obj_make_compatible(self, primitive, target_version): + target_version = versionutils.convert_version_to_tuple(target_version) + if target_version < (1, 7): + # NOTE(danms): Before 1.7, we had an always-empty + # metadetails property + primitive['metadetails'] = {} + + @staticmethod + def _from_db_object(context, instance_group, db_inst): + """Method to help with migration to objects. + + Converts a database entity to a formal object. + """ + # Most of the field names match right now, so be quick + for field in instance_group.fields: + if field in LAZY_LOAD_FIELDS: + continue + # This is needed to handle db models from both the api + # database and the main database. In the migration to + # the api database, we have removed soft-delete, so + # the object fields for delete must be filled in with + # default values for db models from the api database. + ignore = {'deleted': False, + 'deleted_at': None} + if field in ignore and not hasattr(db_inst, field): + instance_group[field] = ignore[field] + else: + instance_group[field] = db_inst[field] + + instance_group._context = context + instance_group.obj_reset_changes() + return instance_group + + @staticmethod + @db_api.api_context_manager.reader + def _get_from_db_by_uuid(context, uuid): + grp = _instance_group_get_query(context, + id_field=api_models.InstanceGroup.uuid, + id=uuid).first() + if not grp: + raise exception.InstanceGroupNotFound(group_uuid=uuid) + return grp + + @staticmethod + @db_api.api_context_manager.reader + def _get_from_db_by_id(context, id): + grp = _instance_group_get_query(context, + id_field=api_models.InstanceGroup.id, + id=id).first() + if not grp: + raise exception.InstanceGroupNotFound(group_uuid=id) + return grp + + @staticmethod + @db_api.api_context_manager.reader + def _get_from_db_by_name(context, name): + grp = _instance_group_get_query(context).filter_by(name=name).first() + if not grp: + raise exception.InstanceGroupNotFound(group_uuid=name) + return grp + + @staticmethod + @db_api.api_context_manager.reader + def _get_from_db_by_instance(context, instance_uuid): + grp_member = context.session.query(api_models.InstanceGroupMember).\ + filter_by(instance_uuid=instance_uuid).first() + if not grp_member: + raise exception.InstanceGroupNotFound(group_uuid='') + grp = InstanceGroup._get_from_db_by_id(context, grp_member.group_id) + return grp + + @staticmethod + @db_api.api_context_manager.writer + def _save_in_db(context, group_uuid, values): + grp = _instance_group_get_query(context, + id_field=api_models.InstanceGroup.uuid, + id=group_uuid).first() + if not grp: + raise exception.InstanceGroupNotFound(group_uuid=group_uuid) + + values_copy = copy.copy(values) + policies = values_copy.pop('policies', None) + members = values_copy.pop('members', None) + + grp.update(values_copy) + + if policies is not None: + _instance_group_policies_add(context, grp, policies) + if members is not None: + _instance_group_members_add(context, grp, members) + + return grp + + @staticmethod + @db_api.api_context_manager.writer + def _create_in_db(context, values, policies=None, members=None): + try: + group = api_models.InstanceGroup() + group.update(values) + group.save(context.session) + except db_exc.DBDuplicateEntry: + raise exception.InstanceGroupIdExists(group_uuid=values['uuid']) + + if policies: + group._policies = _instance_group_policies_add(context, group, + policies) + else: + group._policies = [] + + if members: + group._members = _instance_group_members_add(context, group, + members) + else: + group._members = [] + + return group + + @staticmethod + @db_api.api_context_manager.writer + def _destroy_in_db(context, group_uuid): + qry = _instance_group_get_query(context, + id_field=api_models.InstanceGroup.uuid, + id=group_uuid) + if qry.count() == 0: + raise exception.InstanceGroupNotFound(group_uuid=group_uuid) + + # Delete policies and members + group_id = qry.first().id + instance_models = [api_models.InstanceGroupPolicy, + api_models.InstanceGroupMember] + for model in instance_models: + context.session.query(model).filter_by(group_id=group_id).delete() + + qry.delete() + + @staticmethod + @db_api.api_context_manager.writer + def _add_members_in_db(context, group_uuid, members): + return _instance_group_members_add_by_uuid(context, group_uuid, + members) + + @staticmethod + @db_api.api_context_manager.writer + def _remove_members_in_db(context, group_id, instance_uuids): + # There is no public method provided for removing members because the + # user-facing API doesn't allow removal of instance group members. We + # need to be able to remove members to address quota races. + context.session.query(api_models.InstanceGroupMember).\ + filter_by(group_id=group_id).\ + filter(api_models.InstanceGroupMember.instance_uuid. + in_(set(instance_uuids))).\ + delete(synchronize_session=False) + + def obj_load_attr(self, attrname): + # NOTE(sbauza): Only hosts could be lazy-loaded right now + if attrname != 'hosts': + raise exception.ObjectActionError( + action='obj_load_attr', reason='unable to load %s' % attrname) + + self.hosts = self.get_hosts() + self.obj_reset_changes(['hosts']) + + @base.remotable_classmethod + def get_by_uuid(cls, context, uuid): + db_group = None + try: + db_group = cls._get_from_db_by_uuid(context, uuid) + except exception.InstanceGroupNotFound: + pass + if db_group is None: + db_group = db.instance_group_get(context, uuid) + return cls._from_db_object(context, cls(), db_group) + + @base.remotable_classmethod + def get_by_name(cls, context, name): + try: + db_group = cls._get_from_db_by_name(context, name) + except exception.InstanceGroupNotFound: + igs = InstanceGroupList._get_main_by_project_id(context, + context.project_id) + for ig in igs: + if ig.name == name: + return ig + raise exception.InstanceGroupNotFound(group_uuid=name) + return cls._from_db_object(context, cls(), db_group) + + @base.remotable_classmethod + def get_by_instance_uuid(cls, context, instance_uuid): + db_group = None + try: + db_group = cls._get_from_db_by_instance(context, instance_uuid) + except exception.InstanceGroupNotFound: + pass + if db_group is None: + db_group = db.instance_group_get_by_instance(context, + instance_uuid) + return cls._from_db_object(context, cls(), db_group) + + @classmethod + def get_by_hint(cls, context, hint): + if uuidutils.is_uuid_like(hint): + return cls.get_by_uuid(context, hint) + else: + return cls.get_by_name(context, hint) + + @base.remotable + def save(self): + """Save updates to this instance group.""" + + updates = self.obj_get_changes() + + # NOTE(sbauza): We do NOT save the set of compute nodes that an + # instance group is connected to in this method. Instance groups are + # implicitly connected to compute nodes when the + # InstanceGroup.add_members() method is called, which adds the mapping + # table entries. + # So, since the only way to have hosts in the updates is to set that + # field explicitly, we prefer to raise an Exception so the developer + # knows he has to call obj_reset_changes(['hosts']) right after setting + # the field. + if 'hosts' in updates: + raise exception.InstanceGroupSaveException(field='hosts') + + if not updates: + return + + payload = dict(updates) + payload['server_group_id'] = self.uuid + + try: + db_group = self._save_in_db(self._context, self.uuid, updates) + except exception.InstanceGroupNotFound: + db.instance_group_update(self._context, self.uuid, updates) + db_group = db.instance_group_get(self._context, self.uuid) + self._from_db_object(self._context, self, db_group) + compute_utils.notify_about_server_group_update(self._context, + "update", payload) + + @base.remotable + def refresh(self): + """Refreshes the instance group.""" + current = self.__class__.get_by_uuid(self._context, self.uuid) + for field in self.fields: + if self.obj_attr_is_set(field) and self[field] != current[field]: + self[field] = current[field] + self.obj_reset_changes() + + def _create(self, skipcheck=False): + # NOTE(danms): This is just for the migration routine, and + # can be removed once we're no longer supporting the migration + # of instance groups from the main to api database. + if self.obj_attr_is_set('id'): + raise exception.ObjectActionError(action='create', + reason='already created') + updates = self.obj_get_changes() + payload = dict(updates) + updates.pop('id', None) + policies = updates.pop('policies', None) + members = updates.pop('members', None) + + if 'uuid' not in updates: + self.uuid = uuidutils.generate_uuid() + updates['uuid'] = self.uuid + + if not skipcheck: + try: + db.instance_group_get(self._context, self.uuid) + raise exception.ObjectActionError( + action='create', + reason='already created in main') + except exception.InstanceGroupNotFound: + pass + db_group = self._create_in_db(self._context, updates, + policies=policies, + members=members) + self._from_db_object(self._context, self, db_group) + payload['server_group_id'] = self.uuid + compute_utils.notify_about_server_group_update(self._context, + "create", payload) + compute_utils.notify_about_server_group_action( + context=self._context, + group=self, + action=fields.NotificationAction.CREATE) + + @base.remotable + def create(self): + self._create() + + @base.remotable + def destroy(self): + payload = {'server_group_id': self.uuid} + try: + self._destroy_in_db(self._context, self.uuid) + except exception.InstanceGroupNotFound: + db.instance_group_delete(self._context, self.uuid) + self.obj_reset_changes() + compute_utils.notify_about_server_group_update(self._context, + "delete", payload) + compute_utils.notify_about_server_group_action( + context=self._context, + group=self, + action=fields.NotificationAction.DELETE) + + @base.remotable_classmethod + def add_members(cls, context, group_uuid, instance_uuids): + payload = {'server_group_id': group_uuid, + 'instance_uuids': instance_uuids} + try: + members = cls._add_members_in_db(context, group_uuid, + instance_uuids) + members = [member['instance_uuid'] for member in members] + except exception.InstanceGroupNotFound: + members = db.instance_group_members_add(context, group_uuid, + instance_uuids) + compute_utils.notify_about_server_group_update(context, + "addmember", payload) + return list(members) + + @base.remotable + def get_hosts(self, exclude=None): + """Get a list of hosts for non-deleted instances in the group + + This method allows you to get a list of the hosts where instances in + this group are currently running. There's also an option to exclude + certain instance UUIDs from this calculation. + + """ + filter_uuids = self.members + if exclude: + filter_uuids = set(filter_uuids) - set(exclude) + filters = {'uuid': filter_uuids, 'deleted': False} + instances = objects.InstanceList.get_by_filters(self._context, + filters=filters) + return list(set([instance.host for instance in instances + if instance.host])) + + @base.remotable + def count_members_by_user(self, user_id): + """Count the number of instances in a group belonging to a user.""" + filter_uuids = self.members + filters = {'uuid': filter_uuids, 'user_id': user_id, 'deleted': False} + instances = objects.InstanceList.get_by_filters(self._context, + filters=filters) + return len(instances) + + +@base.NovaObjectRegistry.register +class InstanceGroupList(base.ObjectListBase, base.NovaObject): + # Version 1.0: Initial version + # InstanceGroup <= version 1.3 + # Version 1.1: InstanceGroup <= version 1.4 + # Version 1.2: InstanceGroup <= version 1.5 + # Version 1.3: InstanceGroup <= version 1.6 + # Version 1.4: InstanceGroup <= version 1.7 + # Version 1.5: InstanceGroup <= version 1.8 + # Version 1.6: InstanceGroup <= version 1.9 + # Version 1.7: InstanceGroup <= version 1.10 + # Version 1.8: Added get_counts() for quotas + VERSION = '1.8' + + fields = { + 'objects': fields.ListOfObjectsField('InstanceGroup'), + } + + @staticmethod + @db_api.api_context_manager.reader + def _get_from_db(context, project_id=None): + query = _instance_group_get_query(context) + if project_id is not None: + query = query.filter_by(project_id=project_id) + return query.all() + + @classmethod + def _get_main_by_project_id(cls, context, project_id): + main_db_groups = db.instance_group_get_all_by_project_id(context, + project_id) + return base.obj_make_list(context, cls(context), objects.InstanceGroup, + main_db_groups) + + @staticmethod + @db_api.api_context_manager.reader + def _get_counts_from_db(context, project_id, user_id=None): + query = context.session.query(api_models.InstanceGroup.id).\ + filter_by(project_id=project_id) + counts = {} + counts['project'] = {'server_groups': query.count()} + if user_id: + query = query.filter_by(user_id=user_id) + counts['user'] = {'server_groups': query.count()} + return counts + + @base.remotable_classmethod + def get_by_project_id(cls, context, project_id): + api_db_groups = cls._get_from_db(context, project_id=project_id) + main_db_groups = db.instance_group_get_all_by_project_id(context, + project_id) + return base.obj_make_list(context, cls(context), objects.InstanceGroup, + api_db_groups + main_db_groups) + + @base.remotable_classmethod + def get_all(cls, context): + api_db_groups = cls._get_from_db(context) + main_db_groups = db.instance_group_get_all(context) + return base.obj_make_list(context, cls(context), objects.InstanceGroup, + api_db_groups + main_db_groups) + + @base.remotable_classmethod + def get_counts(cls, context, project_id, user_id=None): + """Get the counts of InstanceGroup objects in the database. + + :param context: The request context for database access + :param project_id: The project_id to count across + :param user_id: The user_id to count across + :returns: A dict containing the project-scoped counts and user-scoped + counts if user_id is specified. For example: + + {'project': {'server_groups': }, + 'user': {'server_groups': }} + """ + return cls._get_counts_from_db(context, project_id, user_id=user_id) + + +@db_api.pick_context_manager_reader +def _get_main_instance_groups(context, limit): + return context.session.query(main_models.InstanceGroup).\ + options(joinedload('_policies')).\ + options(joinedload('_members')).\ + filter_by(deleted=0).\ + limit(limit).\ + all() + + +def migrate_instance_groups_to_api_db(context, count): + main_groups = _get_main_instance_groups(context, count) + done = 0 + for db_group in main_groups: + group = objects.InstanceGroup(context=context, + user_id=db_group.user_id, + project_id=db_group.project_id, + uuid=db_group.uuid, + name=db_group.name, + policies=db_group.policies, + members=db_group.members) + try: + group._create(skipcheck=True) + except exception.InstanceGroupIdExists: + # NOTE(melwitt): This might happen if there's a failure right after + # the InstanceGroup was created and the migration is re-run. + pass + db_api.instance_group_delete(context, db_group.uuid) + done += 1 + return len(main_groups), done diff --git a/nova/objects/keypair.py b/nova/objects/keypair.py new file mode 100644 index 00000000..0e0eb758 --- /dev/null +++ b/nova/objects/keypair.py @@ -0,0 +1,284 @@ +# Copyright 2013 IBM Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_db import exception as db_exc +from oslo_db.sqlalchemy import utils as sqlalchemyutils +from oslo_log import log as logging +from oslo_utils import versionutils + +from nova import db +from nova.db.sqlalchemy import api as db_api +from nova.db.sqlalchemy import api_models +from nova.db.sqlalchemy import models as main_models +from nova import exception +from nova import objects +from nova.objects import base +from nova.objects import fields + +KEYPAIR_TYPE_SSH = 'ssh' +KEYPAIR_TYPE_X509 = 'x509' +LOG = logging.getLogger(__name__) + + +@db_api.api_context_manager.reader +def _get_from_db(context, user_id, name=None, limit=None, marker=None): + query = context.session.query(api_models.KeyPair).\ + filter(api_models.KeyPair.user_id == user_id) + if name is not None: + db_keypair = query.filter(api_models.KeyPair.name == name).\ + first() + if not db_keypair: + raise exception.KeypairNotFound(user_id=user_id, name=name) + return db_keypair + + marker_row = None + if marker is not None: + marker_row = context.session.query(api_models.KeyPair).\ + filter(api_models.KeyPair.name == marker).\ + filter(api_models.KeyPair.user_id == user_id).first() + if not marker_row: + raise exception.MarkerNotFound(marker=marker) + + query = sqlalchemyutils.paginate_query( + query, api_models.KeyPair, limit, ['name'], marker=marker_row) + + return query.all() + + +@db_api.api_context_manager.reader +def _get_count_from_db(context, user_id): + return context.session.query(api_models.KeyPair).\ + filter(api_models.KeyPair.user_id == user_id).\ + count() + + +@db_api.api_context_manager.writer +def _create_in_db(context, values): + kp = api_models.KeyPair() + kp.update(values) + try: + kp.save(context.session) + except db_exc.DBDuplicateEntry: + raise exception.KeyPairExists(key_name=values['name']) + return kp + + +@db_api.api_context_manager.writer +def _destroy_in_db(context, user_id, name): + result = context.session.query(api_models.KeyPair).\ + filter_by(user_id=user_id).\ + filter_by(name=name).\ + delete() + if not result: + raise exception.KeypairNotFound(user_id=user_id, name=name) + + +# TODO(berrange): Remove NovaObjectDictCompat +@base.NovaObjectRegistry.register +class KeyPair(base.NovaPersistentObject, base.NovaObject, + base.NovaObjectDictCompat): + # Version 1.0: Initial version + # Version 1.1: String attributes updated to support unicode + # Version 1.2: Added keypair type + # Version 1.3: Name field is non-null + # Version 1.4: Add localonly flag to get_by_name() + VERSION = '1.4' + + fields = { + 'id': fields.IntegerField(), + 'name': fields.StringField(nullable=False), + 'user_id': fields.StringField(nullable=True), + 'fingerprint': fields.StringField(nullable=True), + 'public_key': fields.StringField(nullable=True), + 'type': fields.StringField(nullable=False), + } + + def obj_make_compatible(self, primitive, target_version): + super(KeyPair, self).obj_make_compatible(primitive, target_version) + target_version = versionutils.convert_version_to_tuple(target_version) + if target_version < (1, 2) and 'type' in primitive: + del primitive['type'] + + @staticmethod + def _from_db_object(context, keypair, db_keypair): + ignore = {'deleted': False, + 'deleted_at': None} + for key in keypair.fields: + if key in ignore and not hasattr(db_keypair, key): + keypair[key] = ignore[key] + else: + keypair[key] = db_keypair[key] + keypair._context = context + keypair.obj_reset_changes() + return keypair + + @staticmethod + def _get_from_db(context, user_id, name): + return _get_from_db(context, user_id, name=name) + + @staticmethod + def _destroy_in_db(context, user_id, name): + return _destroy_in_db(context, user_id, name) + + @staticmethod + def _create_in_db(context, values): + return _create_in_db(context, values) + + @base.remotable_classmethod + def get_by_name(cls, context, user_id, name, + localonly=False): + db_keypair = None + if not localonly: + try: + db_keypair = cls._get_from_db(context, user_id, name) + except exception.KeypairNotFound: + pass + if db_keypair is None: + db_keypair = db.key_pair_get(context, user_id, name) + return cls._from_db_object(context, cls(), db_keypair) + + @base.remotable_classmethod + def destroy_by_name(cls, context, user_id, name): + try: + cls._destroy_in_db(context, user_id, name) + except exception.KeypairNotFound: + db.key_pair_destroy(context, user_id, name) + + @base.remotable + def create(self): + if self.obj_attr_is_set('id'): + raise exception.ObjectActionError(action='create', + reason='already created') + + # NOTE(danms): Check to see if it exists in the old DB before + # letting them create in the API DB, since we won't get protection + # from the UC. + try: + db.key_pair_get(self._context, self.user_id, self.name) + raise exception.KeyPairExists(key_name=self.name) + except exception.KeypairNotFound: + pass + + self._create() + + def _create(self): + updates = self.obj_get_changes() + db_keypair = self._create_in_db(self._context, updates) + self._from_db_object(self._context, self, db_keypair) + + @base.remotable + def destroy(self): + try: + self._destroy_in_db(self._context, self.user_id, self.name) + except exception.KeypairNotFound: + db.key_pair_destroy(self._context, self.user_id, self.name) + + +@base.NovaObjectRegistry.register +class KeyPairList(base.ObjectListBase, base.NovaObject): + # Version 1.0: Initial version + # KeyPair <= version 1.1 + # Version 1.1: KeyPair <= version 1.2 + # Version 1.2: KeyPair <= version 1.3 + # Version 1.3: Add new parameters 'limit' and 'marker' to get_by_user() + VERSION = '1.3' + + fields = { + 'objects': fields.ListOfObjectsField('KeyPair'), + } + + @staticmethod + def _get_from_db(context, user_id, limit, marker): + return _get_from_db(context, user_id, limit=limit, marker=marker) + + @staticmethod + def _get_count_from_db(context, user_id): + return _get_count_from_db(context, user_id) + + @base.remotable_classmethod + def get_by_user(cls, context, user_id, limit=None, marker=None): + try: + api_db_keypairs = cls._get_from_db( + context, user_id, limit=limit, marker=marker) + # NOTE(pkholkin): If we were asked for a marker and found it in + # results from the API DB, we must continue our pagination with + # just the limit (if any) to the main DB. + marker = None + except exception.MarkerNotFound: + api_db_keypairs = [] + + if limit is not None: + limit_more = limit - len(api_db_keypairs) + else: + limit_more = None + + if limit_more is None or limit_more > 0: + main_db_keypairs = db.key_pair_get_all_by_user( + context, user_id, limit=limit_more, marker=marker) + else: + main_db_keypairs = [] + + return base.obj_make_list(context, cls(context), objects.KeyPair, + api_db_keypairs + main_db_keypairs) + + @base.remotable_classmethod + def get_count_by_user(cls, context, user_id): + return (cls._get_count_from_db(context, user_id) + + db.key_pair_count_by_user(context, user_id)) + + +@db_api.pick_context_manager_reader +def _count_unmigrated_instances(context): + return context.session.query(main_models.InstanceExtra).\ + filter_by(keypairs=None).\ + filter_by(deleted=0).\ + count() + + +@db_api.pick_context_manager_reader +def _get_main_keypairs(context, limit): + return context.session.query(main_models.KeyPair).\ + filter_by(deleted=0).\ + limit(limit).\ + all() + + +def migrate_keypairs_to_api_db(context, count): + bad_instances = _count_unmigrated_instances(context) + if bad_instances: + LOG.error('Some instances are still missing keypair ' + 'information. Unable to run keypair migration ' + 'at this time.') + return 0, 0 + + main_keypairs = _get_main_keypairs(context, count) + done = 0 + for db_keypair in main_keypairs: + kp = objects.KeyPair(context=context, + user_id=db_keypair.user_id, + name=db_keypair.name, + fingerprint=db_keypair.fingerprint, + public_key=db_keypair.public_key, + type=db_keypair.type) + try: + kp._create() + except exception.KeyPairExists: + # NOTE(danms): If this got created somehow in the API DB, + # then it's newer and we just continue on to destroy the + # old one in the cell DB. + pass + db_api.key_pair_destroy(context, db_keypair.user_id, db_keypair.name) + done += 1 + + return len(main_keypairs), done diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py new file mode 100644 index 00000000..963cd73b --- /dev/null +++ b/nova/scheduler/client/report.py @@ -0,0 +1,1238 @@ +# Copyright (c) 2014 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import copy +import functools +import re +import time + +from keystoneauth1 import exceptions as ks_exc +from keystoneauth1 import loading as keystone +from oslo_log import log as logging +from six.moves.urllib import parse + +from nova.compute import utils as compute_utils +import nova.conf +from nova import exception +from nova.i18n import _LE, _LI, _LW +from nova import objects +from nova.objects import fields +from nova.scheduler import utils as scheduler_utils +from nova import utils + +CONF = nova.conf.CONF +LOG = logging.getLogger(__name__) +VCPU = fields.ResourceClass.VCPU +MEMORY_MB = fields.ResourceClass.MEMORY_MB +DISK_GB = fields.ResourceClass.DISK_GB +_RE_INV_IN_USE = re.compile("Inventory for (.+) on resource provider " + "(.+) in use") +WARN_EVERY = 10 +PLACEMENT_CLIENT_SEMAPHORE = 'placement_client' + + +def warn_limit(self, msg): + if self._warn_count: + self._warn_count -= 1 + else: + self._warn_count = WARN_EVERY + LOG.warning(msg) + + +def safe_connect(f): + @functools.wraps(f) + def wrapper(self, *a, **k): + try: + return f(self, *a, **k) + except ks_exc.EndpointNotFound: + warn_limit( + self, + _LW('The placement API endpoint not found. Placement is ' + 'optional in Newton, but required in Ocata. Please ' + 'enable the placement service before upgrading.')) + # Reset client session so there is a new catalog, which + # gets cached when keystone is first successfully contacted. + self._client = self._create_client() + except ks_exc.MissingAuthPlugin: + warn_limit( + self, + _LW('No authentication information found for placement ' + 'API. Placement is optional in Newton, but required ' + 'in Ocata. Please enable the placement service ' + 'before upgrading.')) + except ks_exc.Unauthorized: + warn_limit( + self, + _LW('Placement service credentials do not work. ' + 'Placement is optional in Newton, but required ' + 'in Ocata. Please enable the placement service ' + 'before upgrading.')) + except ks_exc.DiscoveryFailure: + # TODO(_gryf): Looks like DiscoveryFailure is not the only missing + # exception here. In Pike we should take care about keystoneauth1 + # failures handling globally. + warn_limit(self, + _LW('Discovering suitable URL for placement API ' + 'failed.')) + except ks_exc.ConnectFailure: + msg = _LW('Placement API service is not responding.') + LOG.warning(msg) + return wrapper + + +def _compute_node_to_inventory_dict(compute_node): + """Given a supplied `objects.ComputeNode` object, return a dict, keyed + by resource class, of various inventory information. + + :param compute_node: `objects.ComputeNode` object to translate + """ + result = {} + + # NOTE(jaypipes): Ironic virt driver will return 0 values for vcpus, + # memory_mb and disk_gb if the Ironic node is not available/operable + if compute_node.vcpus > 0: + result[VCPU] = { + 'total': compute_node.vcpus, + 'reserved': CONF.reserved_host_cpus, + 'min_unit': 1, + 'max_unit': compute_node.vcpus, + 'step_size': 1, + 'allocation_ratio': compute_node.cpu_allocation_ratio, + } + if compute_node.memory_mb > 0: + result[MEMORY_MB] = { + 'total': compute_node.memory_mb, + 'reserved': CONF.reserved_host_memory_mb, + 'min_unit': 1, + 'max_unit': compute_node.memory_mb, + 'step_size': 1, + 'allocation_ratio': compute_node.ram_allocation_ratio, + } + if compute_node.local_gb > 0: + # TODO(johngarbutt) We should either move to reserved_host_disk_gb + # or start tracking DISK_MB. + reserved_disk_gb = compute_utils.convert_mb_to_ceil_gb( + CONF.reserved_host_disk_mb) + result[DISK_GB] = { + 'total': compute_node.local_gb, + 'reserved': reserved_disk_gb, + 'min_unit': 1, + 'max_unit': compute_node.local_gb, + 'step_size': 1, + 'allocation_ratio': compute_node.disk_allocation_ratio, + } + return result + + +def _instance_to_allocations_dict(instance): + """Given an `objects.Instance` object, return a dict, keyed by resource + class of the amount used by the instance. + + :param instance: `objects.Instance` object to translate + """ + alloc_dict = scheduler_utils.resources_from_flavor(instance, + instance.flavor) + + # Remove any zero allocations. + return {key: val for key, val in alloc_dict.items() if val} + + +def _move_operation_alloc_request(source_allocs, dest_alloc_req): + """Given existing allocations for a source host and a new allocation + request for a destination host, return a new allocation request that + contains resources claimed against both source and destination, accounting + for shared providers. + + Also accounts for a resize to the same host where the source and dest + compute node resource providers are going to be the same. In that case + we sum the resource allocations for the single provider. + + :param source_allocs: Dict, keyed by resource provider UUID, of resources + allocated on the source host + :param dest_alloc_request: The allocation request for resources against the + destination host + """ + LOG.debug("Doubling-up allocation request for move operation.") + # Remove any allocations against resource providers that are + # already allocated against on the source host (like shared storage + # providers) + cur_rp_uuids = set(source_allocs.keys()) + new_rp_uuids = set(a['resource_provider']['uuid'] + for a in dest_alloc_req['allocations']) - cur_rp_uuids + + current_allocs = [ + { + 'resource_provider': { + 'uuid': cur_rp_uuid, + }, + 'resources': alloc['resources'], + } for cur_rp_uuid, alloc in source_allocs.items() + ] + new_alloc_req = {'allocations': current_allocs} + for alloc in dest_alloc_req['allocations']: + if alloc['resource_provider']['uuid'] in new_rp_uuids: + new_alloc_req['allocations'].append(alloc) + elif not new_rp_uuids: + # If there are no new_rp_uuids that means we're resizing to + # the same host so we need to sum the allocations for + # the compute node (and possibly shared providers) using both + # the current and new allocations. + # Note that we sum the allocations rather than take the max per + # resource class between the current and new allocations because + # the compute node/resource tracker is going to adjust for + # decrementing any old allocations as necessary, the scheduler + # shouldn't make assumptions about that. + for current_alloc in current_allocs: + # Find the matching resource provider allocations by UUID. + if (current_alloc['resource_provider']['uuid'] == + alloc['resource_provider']['uuid']): + # Now sum the current allocation resource amounts with + # the new allocation resource amounts. + scheduler_utils.merge_resources(current_alloc['resources'], + alloc['resources']) + + LOG.debug("New allocation request containing both source and " + "destination hosts in move operation: %s", new_alloc_req) + return new_alloc_req + + +def _extract_inventory_in_use(body): + """Given an HTTP response body, extract the resource classes that were + still in use when we tried to delete inventory. + + :returns: String of resource classes or None if there was no InventoryInUse + error in the response body. + """ + match = _RE_INV_IN_USE.search(body) + if match: + return match.group(1) + return None + + +def get_placement_request_id(response): + if response is not None: + return response.headers.get( + 'openstack-request-id', + response.headers.get('x-openstack-request-id')) + + +class SchedulerReportClient(object): + """Client class for updating the scheduler.""" + + def __init__(self): + # A dict, keyed by the resource provider UUID, of ResourceProvider + # objects that will have their inventories and allocations tracked by + # the placement API for the compute host + self._resource_providers = {} + # A dict, keyed by resource provider UUID, of sets of aggregate UUIDs + # the provider is associated with + self._provider_aggregate_map = {} + self._client = self._create_client() + # NOTE(danms): Keep track of how naggy we've been + self._warn_count = 0 + self.ks_filter = {'service_type': 'placement', + 'region_name': CONF.placement.os_region_name, + 'interface': CONF.placement.os_interface} + + @utils.synchronized(PLACEMENT_CLIENT_SEMAPHORE) + def _create_client(self): + """Create the HTTP session accessing the placement service.""" + # Flush _resource_providers and aggregates so we start from a + # clean slate. + self._resource_providers = {} + self._provider_aggregate_map = {} + auth_plugin = keystone.load_auth_from_conf_options( + CONF, 'placement') + return keystone.load_session_from_conf_options( + CONF, 'placement', auth=auth_plugin, + additional_headers={'accept': 'application/json'}) + + def get(self, url, version=None): + kwargs = {} + if version is not None: + # TODO(mriedem): Perform some version discovery at some point. + kwargs = { + 'headers': { + 'OpenStack-API-Version': 'placement %s' % version + }, + } + return self._client.get( + url, + endpoint_filter=self.ks_filter, raise_exc=False, **kwargs) + + def post(self, url, data, version=None): + # NOTE(sdague): using json= instead of data= sets the + # media type to application/json for us. Placement API is + # more sensitive to this than other APIs in the OpenStack + # ecosystem. + kwargs = {} + if version is not None: + # TODO(mriedem): Perform some version discovery at some point. + kwargs = { + 'headers': { + 'OpenStack-API-Version': 'placement %s' % version + }, + } + return self._client.post( + url, json=data, + endpoint_filter=self.ks_filter, raise_exc=False, **kwargs) + + def put(self, url, data, version=None): + # NOTE(sdague): using json= instead of data= sets the + # media type to application/json for us. Placement API is + # more sensitive to this than other APIs in the OpenStack + # ecosystem. + kwargs = {} + if version is not None: + # TODO(mriedem): Perform some version discovery at some point. + kwargs = { + 'headers': { + 'OpenStack-API-Version': 'placement %s' % version + }, + } + if data: + kwargs['json'] = data + return self._client.put( + url, endpoint_filter=self.ks_filter, raise_exc=False, + **kwargs) + + def delete(self, url, version=None): + kwargs = {} + if version is not None: + # TODO(mriedem): Perform some version discovery at some point. + kwargs = { + 'headers': { + 'OpenStack-API-Version': 'placement %s' % version + }, + } + return self._client.delete( + url, + endpoint_filter=self.ks_filter, raise_exc=False, **kwargs) + + @safe_connect + def get_allocation_candidates(self, resources): + """Returns a tuple of (allocation_requests, provider_summaries). + + The allocation requests are a collection of potential JSON objects that + can be passed to the PUT /allocations/{consumer_uuid} Placement REST + API to claim resources against one or more resource providers that meet + the requested resource constraints. + + The provider summaries is a dict, keyed by resource provider UUID, of + inventory and capacity information for any resource provider involved + in the allocation requests. + + :returns: A tuple with a list of allocation request dicts and a dict of + provider information or (None, None) if the request failed + + :param resources: A dict, keyed by resource class name, of requested + amounts of those resources + """ + resource_query = ",".join( + sorted("%s:%s" % (rc, amount) + for (rc, amount) in resources.items())) + qs_params = { + 'resources': resource_query, + } + + url = "/allocation_candidates?%s" % parse.urlencode(qs_params) + resp = self.get(url, version='1.10') + if resp.status_code == 200: + data = resp.json() + return data['allocation_requests'], data['provider_summaries'] + + msg = ("Failed to retrieve allocation candidates from placement API " + "for filters %(resources)s. Got %(status_code)d: %(err_text)s.") + args = { + 'resources': resources, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + return None, None + + @safe_connect + def _get_provider_aggregates(self, rp_uuid): + """Queries the placement API for a resource provider's aggregates. + Returns a set() of aggregate UUIDs or None if no such resource provider + was found or there was an error communicating with the placement API. + + :param rp_uuid: UUID of the resource provider to grab aggregates for. + """ + resp = self.get("/resource_providers/%s/aggregates" % rp_uuid, + version='1.1') + if resp.status_code == 200: + data = resp.json() + return set(data['aggregates']) + + placement_req_id = get_placement_request_id(resp) + if resp.status_code == 404: + msg = _LW("[%(placement_req_id)s] Tried to get a provider's " + "aggregates; however the provider %(uuid)s does not " + "exist.") + args = { + 'uuid': rp_uuid, + 'placement_req_id': placement_req_id, + } + LOG.warning(msg, args) + else: + msg = _LE("[%(placement_req_id)s] Failed to retrieve aggregates " + "from placement API for resource provider with UUID " + "%(uuid)s. Got %(status_code)d: %(err_text)s.") + args = { + 'placement_req_id': placement_req_id, + 'uuid': rp_uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + + @safe_connect + def _get_resource_provider(self, uuid): + """Queries the placement API for a resource provider record with the + supplied UUID. + + Returns a dict of resource provider information if found or None if no + such resource provider could be found. + + :param uuid: UUID identifier for the resource provider to look up + """ + resp = self.get("/resource_providers/%s" % uuid) + if resp.status_code == 200: + data = resp.json() + return data + elif resp.status_code == 404: + return None + else: + placement_req_id = get_placement_request_id(resp) + msg = _LE("[%(placement_req_id)s] Failed to retrieve resource " + "provider record from placement API for UUID %(uuid)s. " + "Got %(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + + @safe_connect + def _create_resource_provider(self, uuid, name): + """Calls the placement API to create a new resource provider record. + + Returns a dict of resource provider information object representing + the newly-created resource provider. + + :param uuid: UUID of the new resource provider + :param name: Name of the resource provider + """ + url = "/resource_providers" + payload = { + 'uuid': uuid, + 'name': name, + } + resp = self.post(url, payload) + placement_req_id = get_placement_request_id(resp) + if resp.status_code == 201: + msg = _LI("[%(placement_req_id)s] Created resource provider " + "record via placement API for resource provider with " + "UUID %(uuid)s and name %(name)s.") + args = { + 'uuid': uuid, + 'name': name, + 'placement_req_id': placement_req_id, + } + LOG.info(msg, args) + return dict( + uuid=uuid, + name=name, + generation=0, + ) + elif resp.status_code == 409: + # Another thread concurrently created a resource provider with the + # same UUID. Log a warning and then just return the resource + # provider object from _get_resource_provider() + msg = _LI("[%(placement_req_id)s] Another thread already created " + "a resource provider with the UUID %(uuid)s. Grabbing " + "that record from the placement API.") + args = { + 'uuid': uuid, + 'placement_req_id': placement_req_id, + } + LOG.info(msg, args) + return self._get_resource_provider(uuid) + else: + msg = _LE("[%(placement_req_id)s] Failed to create resource " + "provider record in placement API for UUID %(uuid)s. " + "Got %(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + + def _ensure_resource_provider(self, uuid, name=None): + """Ensures that the placement API has a record of a resource provider + with the supplied UUID. If not, creates the resource provider record in + the placement API for the supplied UUID, optionally passing in a name + for the resource provider. + + The found or created resource provider object is returned from this + method. If the resource provider object for the supplied uuid was not + found and the resource provider record could not be created in the + placement API, we return None. + + :param uuid: UUID identifier for the resource provider to ensure exists + :param name: Optional name for the resource provider if the record + does not exist. If empty, the name is set to the UUID + value + """ + if uuid in self._resource_providers: + # NOTE(jaypipes): This isn't optimal to check if aggregate + # associations have changed each time we call + # _ensure_resource_provider() and get a hit on the local cache of + # provider objects, however the alternative is to force operators + # to restart all their nova-compute workers every time they add or + # change an aggregate. We might optionally want to add some sort of + # cache refresh delay or interval as an optimization? + msg = "Refreshing aggregate associations for resource provider %s" + LOG.debug(msg, uuid) + aggs = self._get_provider_aggregates(uuid) + self._provider_aggregate_map[uuid] = aggs + return self._resource_providers[uuid] + + rp = self._get_resource_provider(uuid) + if rp is None: + name = name or uuid + rp = self._create_resource_provider(uuid, name) + if rp is None: + return + msg = "Grabbing aggregate associations for resource provider %s" + LOG.debug(msg, uuid) + aggs = self._get_provider_aggregates(uuid) + self._resource_providers[uuid] = rp + self._provider_aggregate_map[uuid] = aggs + return rp + + def _get_inventory(self, rp_uuid): + url = '/resource_providers/%s/inventories' % rp_uuid + result = self.get(url) + if not result: + return {'inventories': {}} + return result.json() + + def _get_inventory_and_update_provider_generation(self, rp_uuid): + """Helper method that retrieves the current inventory for the supplied + resource provider according to the placement API. If the cached + generation of the resource provider is not the same as the generation + returned from the placement API, we update the cached generation. + """ + curr = self._get_inventory(rp_uuid) + + # Update our generation immediately, if possible. Even if there + # are no inventories we should always have a generation but let's + # be careful. + server_gen = curr.get('resource_provider_generation') + if server_gen: + my_rp = self._resource_providers[rp_uuid] + if server_gen != my_rp['generation']: + LOG.debug('Updating our resource provider generation ' + 'from %(old)i to %(new)i', + {'old': my_rp['generation'], + 'new': server_gen}) + my_rp['generation'] = server_gen + return curr + + def _update_inventory_attempt(self, rp_uuid, inv_data): + """Update the inventory for this resource provider if needed. + + :param rp_uuid: The resource provider UUID for the operation + :param inv_data: The new inventory for the resource provider + :returns: True if the inventory was updated (or did not need to be), + False otherwise. + """ + curr = self._get_inventory_and_update_provider_generation(rp_uuid) + + # Check to see if we need to update placement's view + if inv_data == curr.get('inventories', {}): + return True + + cur_rp_gen = self._resource_providers[rp_uuid]['generation'] + payload = { + 'resource_provider_generation': cur_rp_gen, + 'inventories': inv_data, + } + url = '/resource_providers/%s/inventories' % rp_uuid + result = self.put(url, payload) + if result.status_code == 409: + LOG.info(_LI('[%(placement_req_id)s] Inventory update conflict ' + 'for %(resource_provider_uuid)s with generation ID ' + '%(generation_id)s'), + {'placement_req_id': get_placement_request_id(result), + 'resource_provider_uuid': rp_uuid, + 'generation_id': cur_rp_gen}) + # NOTE(jaypipes): There may be cases when we try to set a + # provider's inventory that results in attempting to delete an + # inventory record for a resource class that has an active + # allocation. We need to catch this particular case and raise an + # exception here instead of returning False, since we should not + # re-try the operation in this case. + # + # A use case for where this can occur is the following: + # + # 1) Provider created for each Ironic baremetal node in Newton + # 2) Inventory records for baremetal node created for VCPU, + # MEMORY_MB and DISK_GB + # 3) A Nova instance consumes the baremetal node and allocation + # records are created for VCPU, MEMORY_MB and DISK_GB matching + # the total amount of those resource on the baremetal node. + # 3) Upgrade to Ocata and now resource tracker wants to set the + # provider's inventory to a single record of resource class + # CUSTOM_IRON_SILVER (or whatever the Ironic node's + # "resource_class" attribute is) + # 4) Scheduler report client sends the inventory list containing a + # single CUSTOM_IRON_SILVER record and placement service + # attempts to delete the inventory records for VCPU, MEMORY_MB + # and DISK_GB. An exception is raised from the placement service + # because allocation records exist for those resource classes, + # and a 409 Conflict is returned to the compute node. We need to + # trigger a delete of the old allocation records and then set + # the new inventory, and then set the allocation record to the + # new CUSTOM_IRON_SILVER record. + match = _RE_INV_IN_USE.search(result.text) + if match: + rc = match.group(1) + raise exception.InventoryInUse( + resource_classes=rc, + resource_provider=rp_uuid, + ) + + # Invalidate our cache and re-fetch the resource provider + # to be sure to get the latest generation. + del self._resource_providers[rp_uuid] + # NOTE(jaypipes): We don't need to pass a name parameter to + # _ensure_resource_provider() because we know the resource provider + # record already exists. We're just reloading the record here. + self._ensure_resource_provider(rp_uuid) + return False + elif not result: + placement_req_id = get_placement_request_id(result) + LOG.warning(_LW('[%(placement_req_id)s] Failed to update ' + 'inventory for resource provider ' + '%(uuid)s: %(status)i %(text)s'), + {'placement_req_id': placement_req_id, + 'uuid': rp_uuid, + 'status': result.status_code, + 'text': result.text}) + # log the body at debug level + LOG.debug('[%(placement_req_id)s] Failed inventory update request ' + 'for resource provider %(uuid)s with body: %(payload)s', + {'placement_req_id': placement_req_id, + 'uuid': rp_uuid, + 'payload': payload}) + return False + + if result.status_code != 200: + placement_req_id = get_placement_request_id(result) + LOG.info( + _LI('[%(placement_req_id)s] Received unexpected response code ' + '%(code)i while trying to update inventory for resource ' + 'provider %(uuid)s: %(text)s'), + {'placement_req_id': placement_req_id, + 'uuid': rp_uuid, + 'code': result.status_code, + 'text': result.text}) + return False + + # Update our view of the generation for next time + updated_inventories_result = result.json() + new_gen = updated_inventories_result['resource_provider_generation'] + + self._resource_providers[rp_uuid]['generation'] = new_gen + LOG.debug('Updated inventory for %s at generation %i', + rp_uuid, new_gen) + return True + + @safe_connect + def _update_inventory(self, rp_uuid, inv_data): + for attempt in (1, 2, 3): + if rp_uuid not in self._resource_providers: + # NOTE(danms): Either we failed to fetch/create the RP + # on our first attempt, or a previous attempt had to + # invalidate the cache, and we were unable to refresh + # it. Bail and try again next time. + LOG.warning(_LW( + 'Unable to refresh my resource provider record')) + return False + if self._update_inventory_attempt(rp_uuid, inv_data): + return True + time.sleep(1) + return False + + @safe_connect + def _delete_inventory(self, rp_uuid): + """Deletes all inventory records for a resource provider with the + supplied UUID. + + First attempt to DELETE the inventory using microversion 1.5. If + this results in a 406, fail over to a PUT. + """ + curr = self._get_inventory_and_update_provider_generation(rp_uuid) + + # Check to see if we need to update placement's view + if not curr.get('inventories', {}): + msg = "No inventory to delete from resource provider %s." + LOG.debug(msg, rp_uuid) + return + + msg = _LI("Resource provider %s reported no inventory but previous " + "inventory was detected. Deleting existing inventory " + "records.") + LOG.info(msg, rp_uuid) + + url = '/resource_providers/%s/inventories' % rp_uuid + r = self.delete(url, version="1.5") + placement_req_id = get_placement_request_id(r) + cur_rp_gen = self._resource_providers[rp_uuid]['generation'] + msg_args = { + 'rp_uuid': rp_uuid, + 'placement_req_id': placement_req_id, + } + if r.status_code == 406: + # microversion 1.5 not available so try the earlier way + # TODO(cdent): When we're happy that all placement + # servers support microversion 1.5 we can remove this + # call and the associated code. + LOG.debug('Falling back to placement API microversion 1.0 ' + 'for deleting all inventory for a resource provider.') + payload = { + 'resource_provider_generation': cur_rp_gen, + 'inventories': {}, + } + r = self.put(url, payload) + placement_req_id = get_placement_request_id(r) + msg_args['placement_req_id'] = placement_req_id + if r.status_code == 200: + # Update our view of the generation for next time + updated_inv = r.json() + new_gen = updated_inv['resource_provider_generation'] + + self._resource_providers[rp_uuid]['generation'] = new_gen + msg_args['generation'] = new_gen + LOG.info(_LI("[%(placement_req_id)s] Deleted all inventory " + "for resource provider %(rp_uuid)s at generation " + "%(generation)i."), + msg_args) + return + + if r.status_code == 204: + self._resource_providers[rp_uuid]['generation'] = cur_rp_gen + 1 + LOG.info(_LI("[%(placement_req_id)s] Deleted all inventory for " + "resource provider %(rp_uuid)s."), + msg_args) + return + elif r.status_code == 404: + # This can occur if another thread deleted the inventory and the + # resource provider already + LOG.debug("[%(placement_req_id)s] Resource provider %(rp_uuid)s " + "deleted by another thread when trying to delete " + "inventory. Ignoring.", + msg_args) + self._resource_providers.pop(rp_uuid, None) + self._provider_aggregate_map.pop(rp_uuid, None) + return + elif r.status_code == 409: + rc_str = _extract_inventory_in_use(r.text) + if rc_str is not None: + msg = _LW("[%(placement_req_id)s] We cannot delete inventory " + "%(rc_str)s for resource provider %(rp_uuid)s " + "because the inventory is in use.") + msg_args['rc_str'] = rc_str + LOG.warning(msg, msg_args) + return + + msg = _LE("[%(placement_req_id)s] Failed to delete inventory for " + "resource provider %(rp_uuid)s. Got error response: " + "%(err)s.") + msg_args['err'] = r.text + LOG.error(msg, msg_args) + + def set_inventory_for_provider(self, rp_uuid, rp_name, inv_data): + """Given the UUID of a provider, set the inventory records for the + provider to the supplied dict of resources. + + :param rp_uuid: UUID of the resource provider to set inventory for + :param rp_name: Name of the resource provider in case we need to create + a record for it in the placement API + :param inv_data: Dict, keyed by resource class name, of inventory data + to set against the provider + + :raises: exc.InvalidResourceClass if a supplied custom resource class + name does not meet the placement API's format requirements. + """ + self._ensure_resource_provider(rp_uuid, rp_name) + + # Auto-create custom resource classes coming from a virt driver + list(map(self._ensure_resource_class, + (rc_name for rc_name in inv_data + if rc_name not in fields.ResourceClass.STANDARD))) + + if inv_data: + self._update_inventory(rp_uuid, inv_data) + else: + self._delete_inventory(rp_uuid) + + @safe_connect + def _ensure_resource_class(self, name): + """Make sure a custom resource class exists. + + First attempt to PUT the resource class using microversion 1.7. If + this results in a 406, fail over to a GET and POST with version 1.2. + + Returns the name of the resource class if it was successfully + created or already exists. Otherwise None. + + :param name: String name of the resource class to check/create. + :raises: `exception.InvalidResourceClass` upon error. + """ + # no payload on the put request + response = self.put("/resource_classes/%s" % name, None, version="1.7") + if 200 <= response.status_code < 300: + return name + elif response.status_code == 406: + # microversion 1.7 not available so try the earlier way + # TODO(cdent): When we're happy that all placement + # servers support microversion 1.7 we can remove this + # call and the associated code. + LOG.debug('Falling back to placement API microversion 1.2 ' + 'for resource class management.') + return self._get_or_create_resource_class(name) + else: + msg = _LE("Failed to ensure resource class record with " + "placement API for resource class %(rc_name)s. " + "Got %(status_code)d: %(err_text)s.") + args = { + 'rc_name': name, + 'status_code': response.status_code, + 'err_text': response.text, + } + LOG.error(msg, args) + raise exception.InvalidResourceClass(resource_class=name) + + def _get_or_create_resource_class(self, name): + """Queries the placement API for a resource class supplied resource + class string name. If the resource class does not exist, creates it. + + Returns the resource class name if exists or was created, else None. + + :param name: String name of the resource class to check/create. + """ + resp = self.get("/resource_classes/%s" % name, version="1.2") + if 200 <= resp.status_code < 300: + return name + elif resp.status_code == 404: + self._create_resource_class(name) + return name + else: + msg = _LE("Failed to retrieve resource class record from " + "placement API for resource class %(rc_name)s. " + "Got %(status_code)d: %(err_text)s.") + args = { + 'rc_name': name, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + return None + + def _create_resource_class(self, name): + """Calls the placement API to create a new resource class. + + :param name: String name of the resource class to create. + + :returns: None on successful creation. + :raises: `exception.InvalidResourceClass` upon error. + """ + url = "/resource_classes" + payload = { + 'name': name, + } + resp = self.post(url, payload, version="1.2") + if 200 <= resp.status_code < 300: + msg = _LI("Created resource class record via placement API " + "for resource class %s.") + LOG.info(msg, name) + elif resp.status_code == 409: + # Another thread concurrently created a resource class with the + # same name. Log a warning and then just return + msg = _LI("Another thread already created a resource class " + "with the name %s. Returning.") + LOG.info(msg, name) + else: + msg = _LE("Failed to create resource class %(resource_class)s in " + "placement API. Got %(status_code)d: %(err_text)s.") + args = { + 'resource_class': name, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + raise exception.InvalidResourceClass(resource_class=name) + + def update_compute_node(self, compute_node): + """Creates or updates stats for the supplied compute node. + + :param compute_node: updated nova.objects.ComputeNode to report + :raises `exception.InventoryInUse` if the compute node has had changes + to its inventory but there are still active allocations for + resource classes that would be deleted by an update to the + placement API. + """ + self._ensure_resource_provider(compute_node.uuid, + compute_node.hypervisor_hostname) + inv_data = _compute_node_to_inventory_dict(compute_node) + if inv_data: + self._update_inventory(compute_node.uuid, inv_data) + else: + self._delete_inventory(compute_node.uuid) + + @safe_connect + def get_allocations_for_instance(self, rp_uuid, instance): + url = '/allocations/%s' % instance.uuid + resp = self.get(url) + if not resp: + return {} + else: + # NOTE(cdent): This trims to just the allocations being + # used on this resource provider. In the future when there + # are shared resources there might be other providers. + return resp.json()['allocations'].get( + rp_uuid, {}).get('resources', {}) + + def _allocate_for_instance(self, rp_uuid, instance): + my_allocations = _instance_to_allocations_dict(instance) + current_allocations = self.get_allocations_for_instance(rp_uuid, + instance) + if current_allocations == my_allocations: + allocstr = ','.join(['%s=%s' % (k, v) + for k, v in my_allocations.items()]) + LOG.debug('Instance %(uuid)s allocations are unchanged: %(alloc)s', + {'uuid': instance.uuid, 'alloc': allocstr}) + return + + LOG.debug('Sending allocation for instance %s', + my_allocations, + instance=instance) + res = self.put_allocations(rp_uuid, instance.uuid, my_allocations, + instance.project_id, instance.user_id) + if res: + LOG.info(_LI('Submitted allocation for instance'), + instance=instance) + + # NOTE(jaypipes): Currently, this method is ONLY used in two places: + # 1. By the scheduler to allocate resources on the selected destination + # hosts. + # 2. By the conductor LiveMigrationTask to allocate resources on a forced + # destination host. This is a short-term fix for Pike which should be + # replaced in Queens by conductor calling the scheduler in the force + # host case. + # This method should not be called by the resource tracker; instead, the + # _allocate_for_instance() method is used which does not perform any + # checking that a move operation is in place. + @safe_connect + def claim_resources(self, consumer_uuid, alloc_request, project_id, + user_id, attempt=0): + """Creates allocation records for the supplied instance UUID against + the supplied resource providers. + + We check to see if resources have already been claimed for this + consumer. If so, we assume that a move operation is underway and the + scheduler is attempting to claim resources against the new (destination + host). In order to prevent compute nodes currently performing move + operations from being scheduled to improperly, we create a "doubled-up" + allocation that consumes resources on *both* the source and the + destination host during the move operation. When the move operation + completes, the destination host (via _allocate_for_instance()) will + end up setting allocations for the instance only on the destination + host thereby freeing up resources on the source host appropriately. + + :note: This method will attempt to retry a claim that fails with a + concurrent update up to 3 times + + :param consumer_uuid: The instance's UUID. + :param alloc_request: The JSON body of the request to make to the + placement's PUT /allocations API + :param project_id: The project_id associated with the allocations. + :param user_id: The user_id associated with the allocations. + :param attempt: The attempt at claiming this allocation request (used + in recursive retries) + :returns: True if the allocations were created, False otherwise. + """ + # Ensure we don't change the supplied alloc request since it's used in + # a loop within the scheduler against multiple instance claims + ar = copy.deepcopy(alloc_request) + url = '/allocations/%s' % consumer_uuid + + payload = ar + + # We first need to determine if this is a move operation and if so + # create the "doubled-up" allocation that exists for the duration of + # the move operation against both the source and destination hosts + r = self.get(url) + if r.status_code == 200: + current_allocs = r.json()['allocations'] + if current_allocs: + payload = _move_operation_alloc_request(current_allocs, ar) + + payload['project_id'] = project_id + payload['user_id'] = user_id + r = self.put(url, payload, version='1.10') + if r.status_code != 204: + # NOTE(jaypipes): Yes, it sucks doing string comparison like this + # but we have no error codes, only error messages. + if attempt < 3 and 'concurrently updated' in r.text: + # Another thread updated one or more of the resource providers + # involved in the claim. It's safe to retry the claim + # transaction. + LOG.debug("Another process changed the resource providers " + "involved in our claim attempt. Retrying claim.") + return self.claim_resources(consumer_uuid, alloc_request, + project_id, user_id, attempt=(attempt + 1)) + LOG.warning( + 'Unable to submit allocation for instance ' + '%(uuid)s (%(code)i %(text)s)', + {'uuid': consumer_uuid, + 'code': r.status_code, + 'text': r.text}) + return r.status_code == 204 + + @safe_connect + def remove_provider_from_instance_allocation(self, consumer_uuid, rp_uuid, + user_id, project_id, + resources): + """Grabs an allocation for a particular consumer UUID, strips parts of + the allocation that refer to a supplied resource provider UUID, and + then PUTs the resulting allocation back to the placement API for the + consumer. + + This is used to reconcile the "doubled-up" allocation that the + scheduler constructs when claiming resources against the destination + host during a move operation. + + If the move was between hosts, the entire allocation for rp_uuid will + be dropped. If the move is a resize on the same host, then we will + subtract resources from the single allocation to ensure we do not + exceed the reserved or max_unit amounts for the resource on the host. + + :param consumer_uuid: The instance/consumer UUID + :param rp_uuid: The UUID of the provider whose resources we wish to + remove from the consumer's allocation + :param user_id: The instance's user + :param project_id: The instance's project + :param resources: The resources to be dropped from the allocation + """ + url = '/allocations/%s' % consumer_uuid + + # Grab the "doubled-up" allocation that we will manipulate + r = self.get(url) + if r.status_code != 200: + LOG.warning("Failed to retrieve allocations for %s. Got HTTP %s", + consumer_uuid, r.status_code) + return False + + current_allocs = r.json()['allocations'] + if not current_allocs: + LOG.error("Expected to find current allocations for %s, but " + "found none.", consumer_uuid) + return False + + # If the host isn't in the current allocation for the instance, don't + # do anything + if rp_uuid not in current_allocs: + LOG.warning("Expected to find allocations referencing resource " + "provider %s for %s, but found none.", + rp_uuid, consumer_uuid) + return True + + compute_providers = [uuid for uuid, alloc in current_allocs.items() + if 'VCPU' in alloc['resources']] + LOG.debug('Current allocations for instance: %s', current_allocs, + instance_uuid=consumer_uuid) + LOG.debug('Instance %s has resources on %i compute nodes', + consumer_uuid, len(compute_providers)) + + new_allocs = [ + { + 'resource_provider': { + 'uuid': alloc_rp_uuid, + }, + 'resources': alloc['resources'], + } + for alloc_rp_uuid, alloc in current_allocs.items() + if alloc_rp_uuid != rp_uuid + ] + + if len(compute_providers) == 1: + # NOTE(danms): We are in a resize to same host scenario. Since we + # are the only provider then we need to merge back in the doubled + # allocation with our part subtracted + peer_alloc = { + 'resource_provider': { + 'uuid': rp_uuid, + }, + 'resources': current_allocs[rp_uuid]['resources'] + } + LOG.debug('Original resources from same-host ' + 'allocation: %s', peer_alloc['resources']) + scheduler_utils.merge_resources(peer_alloc['resources'], + resources, -1) + LOG.debug('Subtracting old resources from same-host ' + 'allocation: %s', peer_alloc['resources']) + new_allocs.append(peer_alloc) + + payload = {'allocations': new_allocs} + payload['project_id'] = project_id + payload['user_id'] = user_id + LOG.debug("Sending updated allocation %s for instance %s after " + "removing resources for %s.", + new_allocs, consumer_uuid, rp_uuid) + r = self.put(url, payload, version='1.10') + if r.status_code != 204: + LOG.warning("Failed to save allocation for %s. Got HTTP %s: %s", + consumer_uuid, r.status_code, r.text) + return r.status_code == 204 + + @safe_connect + def put_allocations(self, rp_uuid, consumer_uuid, alloc_data, project_id, + user_id): + """Creates allocation records for the supplied instance UUID against + the supplied resource provider. + + :note Currently we only allocate against a single resource provider. + Once shared storage and things like NUMA allocations are a + reality, this will change to allocate against multiple providers. + + :param rp_uuid: The UUID of the resource provider to allocate against. + :param consumer_uuid: The instance's UUID. + :param alloc_data: Dict, keyed by resource class, of amounts to + consume. + :param project_id: The project_id associated with the allocations. + :param user_id: The user_id associated with the allocations. + :returns: True if the allocations were created, False otherwise. + """ + payload = { + 'allocations': [ + { + 'resource_provider': { + 'uuid': rp_uuid, + }, + 'resources': alloc_data, + }, + ], + 'project_id': project_id, + 'user_id': user_id, + } + url = '/allocations/%s' % consumer_uuid + r = self.put(url, payload, version='1.8') + if r.status_code == 406: + # microversion 1.8 not available so try the earlier way + # TODO(melwitt): Remove this when we can be sure all placement + # servers support version 1.8. + payload.pop('project_id') + payload.pop('user_id') + r = self.put(url, payload) + if r.status_code != 204: + LOG.warning( + 'Unable to submit allocation for instance ' + '%(uuid)s (%(code)i %(text)s)', + {'uuid': consumer_uuid, + 'code': r.status_code, + 'text': r.text}) + return r.status_code == 204 + + @safe_connect + def delete_allocation_for_instance(self, uuid): + url = '/allocations/%s' % uuid + r = self.delete(url) + if r: + LOG.info(_LI('Deleted allocation for instance %s'), + uuid) + else: + # Check for 404 since we don't need to log a warning if we tried to + # delete something which doesn't actually exist. + if r.status_code != 404: + LOG.warning( + _LW('Unable to delete allocation for instance ' + '%(uuid)s: (%(code)i %(text)s)'), + {'uuid': uuid, + 'code': r.status_code, + 'text': r.text}) + + def update_instance_allocation(self, compute_node, instance, sign): + if sign > 0: + self._allocate_for_instance(compute_node.uuid, instance) + else: + self.delete_allocation_for_instance(instance.uuid) + + @safe_connect + def get_allocations_for_resource_provider(self, rp_uuid): + url = '/resource_providers/%s/allocations' % rp_uuid + resp = self.get(url) + if not resp: + return {} + else: + return resp.json()['allocations'] + + @safe_connect + def delete_resource_provider(self, context, compute_node, cascade=False): + """Deletes the ResourceProvider record for the compute_node. + + :param context: The security context + :param compute_node: The nova.objects.ComputeNode object that is the + resource provider being deleted. + :param cascade: Boolean value that, when True, will first delete any + associated Allocation and Inventory records for the + compute node + """ + nodename = compute_node.hypervisor_hostname + host = compute_node.host + rp_uuid = compute_node.uuid + if cascade: + # Delete any allocations for this resource provider. + # Since allocations are by consumer, we get the consumers on this + # host, which are its instances. + instances = objects.InstanceList.get_by_host_and_node(context, + host, nodename) + for instance in instances: + self.delete_allocation_for_instance(instance.uuid) + url = "/resource_providers/%s" % rp_uuid + resp = self.delete(url) + if resp: + LOG.info(_LI("Deleted resource provider %s"), rp_uuid) + # clean the caches + self._resource_providers.pop(rp_uuid, None) + self._provider_aggregate_map.pop(rp_uuid, None) + else: + # Check for 404 since we don't need to log a warning if we tried to + # delete something which doesn"t actually exist. + if resp.status_code != 404: + LOG.warning( + _LW("Unable to delete resource provider " + "%(uuid)s: (%(code)i %(text)s)"), + {"uuid": rp_uuid, + "code": resp.status_code, + "text": resp.text}) diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py new file mode 100644 index 00000000..e67dbeef --- /dev/null +++ b/nova/scheduler/filter_scheduler.py @@ -0,0 +1,337 @@ +# Copyright (c) 2011 OpenStack Foundation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +The FilterScheduler is for creating instances locally. +You can customize this scheduler by specifying your own Host Filters and +Weighing Functions. +""" + +import random + +from oslo_log import log as logging +from six.moves import range + +import nova.conf +from nova import exception +from nova.i18n import _ +from nova import rpc +from nova.scheduler import client +from nova.scheduler import driver + +CONF = nova.conf.CONF +LOG = logging.getLogger(__name__) + + +class FilterScheduler(driver.Scheduler): + """Scheduler that can be used for filtering and weighing.""" + def __init__(self, *args, **kwargs): + super(FilterScheduler, self).__init__(*args, **kwargs) + self.notifier = rpc.get_notifier('scheduler') + scheduler_client = client.SchedulerClient() + self.placement_client = scheduler_client.reportclient + + def select_destinations(self, context, spec_obj, instance_uuids, + alloc_reqs_by_rp_uuid, provider_summaries): + """Returns a sorted list of HostState objects that satisfy the + supplied request_spec. + + These hosts will have already had their resources claimed in Placement. + + :param context: The RequestContext object + :param spec_obj: The RequestSpec object + :param instance_uuids: List of UUIDs, one for each value of the spec + object's num_instances attribute + :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider + UUID, of the allocation requests that may + be used to claim resources against + matched hosts. If None, indicates either + the placement API wasn't reachable or + that there were no allocation requests + returned by the placement API. If the + latter, the provider_summaries will be an + empty dict, not None. + :param provider_summaries: Optional dict, keyed by resource provider + UUID, of information that will be used by + the filters/weighers in selecting matching + hosts for a request. If None, indicates that + the scheduler driver should grab all compute + node information locally and that the + Placement API is not used. If an empty dict, + indicates the Placement API returned no + potential matches for the requested + resources. + """ + self.notifier.info( + context, 'scheduler.select_destinations.start', + dict(request_spec=spec_obj.to_legacy_request_spec_dict())) + + num_instances = spec_obj.num_instances + selected_hosts = self._schedule(context, spec_obj, instance_uuids, + alloc_reqs_by_rp_uuid, provider_summaries) + + # Couldn't fulfill the request_spec + if len(selected_hosts) < num_instances: + # NOTE(Rui Chen): If multiple creates failed, set the updated time + # of selected HostState to None so that these HostStates are + # refreshed according to database in next schedule, and release + # the resource consumed by instance in the process of selecting + # host. + for host in selected_hosts: + host.updated = None + + # Log the details but don't put those into the reason since + # we don't want to give away too much information about our + # actual environment. + LOG.debug('There are %(hosts)d hosts available but ' + '%(num_instances)d instances requested to build.', + {'hosts': len(selected_hosts), + 'num_instances': num_instances}) + + reason = _('There are not enough hosts available.') + raise exception.NoValidHost(reason=reason) + + self.notifier.info( + context, 'scheduler.select_destinations.end', + dict(request_spec=spec_obj.to_legacy_request_spec_dict())) + return selected_hosts + + def _schedule(self, context, spec_obj, instance_uuids, + alloc_reqs_by_rp_uuid, provider_summaries): + """Returns a list of hosts that meet the required specs, ordered by + their fitness. + + These hosts will have already had their resources claimed in Placement. + + :param context: The RequestContext object + :param spec_obj: The RequestSpec object + :param instance_uuids: List of instance UUIDs to place or move. + :param alloc_reqs_by_rp_uuid: Optional dict, keyed by resource provider + UUID, of the allocation requests that may + be used to claim resources against + matched hosts. If None, indicates either + the placement API wasn't reachable or + that there were no allocation requests + returned by the placement API. If the + latter, the provider_summaries will be an + empty dict, not None. + :param provider_summaries: Optional dict, keyed by resource provider + UUID, of information that will be used by + the filters/weighers in selecting matching + hosts for a request. If None, indicates that + the scheduler driver should grab all compute + node information locally and that the + Placement API is not used. If an empty dict, + indicates the Placement API returned no + potential matches for the requested + resources. + """ + elevated = context.elevated() + + # Find our local list of acceptable hosts by repeatedly + # filtering and weighing our options. Each time we choose a + # host, we virtually consume resources on it so subsequent + # selections can adjust accordingly. + + # Note: remember, we are using an iterator here. So only + # traverse this list once. This can bite you if the hosts + # are being scanned in a filter or weighing function. + hosts = self._get_all_host_states(elevated, spec_obj, + provider_summaries) + + # A list of the instance UUIDs that were successfully claimed against + # in the placement API. If we are not able to successfully claim for + # all involved instances, we use this list to remove those allocations + # before returning + claimed_instance_uuids = [] + + selected_hosts = [] + + # NOTE(sbauza): The RequestSpec.num_instances field contains the number + # of instances created when the RequestSpec was used to first boot some + # instances. This is incorrect when doing a move or resize operation, + # so prefer the length of instance_uuids unless it is None. + num_instances = (len(instance_uuids) if instance_uuids + else spec_obj.num_instances) + for num in range(num_instances): + hosts = self._get_sorted_hosts(spec_obj, hosts, num) + if not hosts: + # NOTE(jaypipes): If we get here, that means not all instances + # in instance_uuids were able to be matched to a selected host. + # So, let's clean up any already-claimed allocations here + # before breaking and returning + self._cleanup_allocations(claimed_instance_uuids) + break + + if (instance_uuids is None or + not self.USES_ALLOCATION_CANDIDATES or + alloc_reqs_by_rp_uuid is None): + # Unfortunately, we still need to deal with older conductors + # that may not be passing in a list of instance_uuids. In those + # cases, obviously we can't claim resources because we don't + # have instance UUIDs to claim with, so we just grab the first + # host in the list of sorted hosts. In addition to older + # conductors, we need to support the caching scheduler, which + # doesn't use the placement API (and has + # USES_ALLOCATION_CANDIDATE = False) and therefore we skip all + # the claiming logic for that scheduler driver. Finally, if + # there was a problem communicating with the placement API, + # alloc_reqs_by_rp_uuid will be None, so we skip claiming in + # that case as well + claimed_host = hosts[0] + else: + instance_uuid = instance_uuids[num] + + # Attempt to claim the resources against one or more resource + # providers, looping over the sorted list of possible hosts + # looking for an allocation request that contains that host's + # resource provider UUID + claimed_host = None + for host in hosts: + cn_uuid = host.uuid + if cn_uuid not in alloc_reqs_by_rp_uuid: + LOG.debug("Found host state %s that wasn't in " + "allocation requests. Skipping.", cn_uuid) + continue + + alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid] + if self._claim_resources(elevated, spec_obj, instance_uuid, + alloc_reqs): + claimed_host = host + break + + if claimed_host is None: + # We weren't able to claim resources in the placement API + # for any of the sorted hosts identified. So, clean up any + # successfully-claimed resources for prior instances in + # this request and return an empty list which will cause + # select_destinations() to raise NoValidHost + LOG.debug("Unable to successfully claim against any host.") + self._cleanup_allocations(claimed_instance_uuids) + return [] + + claimed_instance_uuids.append(instance_uuid) + + LOG.debug("Selected host: %(host)s", {'host': claimed_host}) + selected_hosts.append(claimed_host) + + # Now consume the resources so the filter/weights will change for + # the next instance. + claimed_host.consume_from_request(spec_obj) + if spec_obj.instance_group is not None: + spec_obj.instance_group.hosts.append(claimed_host.host) + # hosts has to be not part of the updates when saving + spec_obj.instance_group.obj_reset_changes(['hosts']) + return selected_hosts + + def _cleanup_allocations(self, instance_uuids): + """Removes allocations for the supplied instance UUIDs.""" + if not instance_uuids: + return + LOG.debug("Cleaning up allocations for %s", instance_uuids) + for uuid in instance_uuids: + self.placement_client.delete_allocation_for_instance(uuid) + + def _claim_resources(self, ctx, spec_obj, instance_uuid, alloc_reqs): + """Given an instance UUID (representing the consumer of resources), the + HostState object for the host that was chosen for the instance, and a + list of allocation request JSON objects, attempt to claim resources for + the instance in the placement API. Returns True if the claim process + was successful, False otherwise. + + :param ctx: The RequestContext object + :param spec_obj: The RequestSpec object + :param instance_uuid: The UUID of the consuming instance + :param cn_uuid: UUID of the host to allocate against + :param alloc_reqs: A list of allocation request JSON objects that + allocate against (at least) the compute host + selected by the _schedule() method. These allocation + requests were constructed from a call to the GET + /allocation_candidates placement API call. Each + allocation_request satisfies the original request + for resources and can be supplied as-is (along with + the project and user ID to the placement API's + PUT /allocations/{consumer_uuid} call to claim + resources for the instance + """ + LOG.debug("Attempting to claim resources in the placement API for " + "instance %s", instance_uuid) + + project_id = spec_obj.project_id + + # NOTE(jaypipes): So, the RequestSpec doesn't store the user_id, + # only the project_id, so we need to grab the user information from + # the context. Perhaps we should consider putting the user ID in + # the spec object? + user_id = ctx.user_id + + # TODO(jaypipes): Loop through all allocation requests instead of just + # trying the first one. For now, since we'll likely want to order the + # allocation requests in the future based on information in the + # provider summaries, we'll just try to claim resources using the first + # allocation request + alloc_req = alloc_reqs[0] + + return self.placement_client.claim_resources(instance_uuid, + alloc_req, project_id, user_id) + + def _get_sorted_hosts(self, spec_obj, host_states, index): + """Returns a list of HostState objects that match the required + scheduling constraints for the request spec object and have been sorted + according to the weighers. + """ + filtered_hosts = self.host_manager.get_filtered_hosts(host_states, + spec_obj, index) + + LOG.debug("Filtered %(hosts)s", {'hosts': filtered_hosts}) + + if not filtered_hosts: + return [] + + weighed_hosts = self.host_manager.get_weighed_hosts(filtered_hosts, + spec_obj) + # Strip off the WeighedHost wrapper class... + weighed_hosts = [h.obj for h in weighed_hosts] + + LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts}) + + # We randomize the first element in the returned list to alleviate + # congestion where the same host is consistently selected among + # numerous potential hosts for similar request specs. + host_subset_size = CONF.filter_scheduler.host_subset_size + if host_subset_size < len(weighed_hosts): + weighed_subset = weighed_hosts[0:host_subset_size] + else: + weighed_subset = weighed_hosts + chosen_host = random.choice(weighed_subset) + weighed_hosts.remove(chosen_host) + return [chosen_host] + weighed_hosts + + def _get_all_host_states(self, context, spec_obj, provider_summaries): + """Template method, so a subclass can implement caching.""" + # NOTE(jaypipes): provider_summaries being None is treated differently + # from an empty dict. provider_summaries is None when we want to grab + # all compute nodes, for instance when using the caching scheduler. + # The provider_summaries variable will be an empty dict when the + # Placement API found no providers that match the requested + # constraints, which in turn makes compute_uuids an empty list and + # get_host_states_by_uuids will return an empty tuple also, which will + # eventually result in a NoValidHost error. + compute_uuids = None + if provider_summaries is not None: + compute_uuids = list(provider_summaries.keys()) + return self.host_manager.get_host_states_by_uuids(context, + compute_uuids, + spec_obj) diff --git a/nova/virt/configdrive.py b/nova/virt/configdrive.py new file mode 100644 index 00000000..1eb77cef --- /dev/null +++ b/nova/virt/configdrive.py @@ -0,0 +1,178 @@ +# Copyright 2012 Michael Still and Canonical Inc +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Config Drive v2 helper.""" + +import os +import shutil + +from oslo_utils import fileutils +from oslo_utils import units +import six + +import nova.conf +from nova import exception +from nova.objects import fields +from nova import utils +from nova import version + +CONF = nova.conf.CONF + +# Config drives are 64mb, if we can't size to the exact size of the data +CONFIGDRIVESIZE_BYTES = 64 * units.Mi + + +class ConfigDriveBuilder(object): + """Build config drives, optionally as a context manager.""" + + def __init__(self, instance_md=None): + self.imagefile = None + self.mdfiles = [] + + if instance_md is not None: + self.add_instance_metadata(instance_md) + + def __enter__(self): + return self + + def __exit__(self, exctype, excval, exctb): + if exctype is not None: + # NOTE(mikal): this means we're being cleaned up because an + # exception was thrown. All bets are off now, and we should not + # swallow the exception + return False + self.cleanup() + + def _add_file(self, basedir, path, data): + filepath = os.path.join(basedir, path) + dirname = os.path.dirname(filepath) + fileutils.ensure_tree(dirname) + with open(filepath, 'wb') as f: + # the given data can be either text or bytes. we can only write + # bytes into files. + if isinstance(data, six.text_type): + data = data.encode('utf-8') + f.write(data) + + def add_instance_metadata(self, instance_md): + for (path, data) in instance_md.metadata_for_config_drive(): + self.mdfiles.append((path, data)) + + def _write_md_files(self, basedir): + for data in self.mdfiles: + self._add_file(basedir, data[0], data[1]) + + def _make_iso9660(self, path, tmpdir): + publisher = "%(product)s %(version)s" % { + 'product': version.product_string(), + 'version': version.version_string_with_package() + } + + utils.execute(CONF.mkisofs_cmd, + '-o', path, + '-ldots', + '-allow-lowercase', + '-allow-multidot', + '-l', + '-publisher', + publisher, + '-quiet', + '-J', + '-r', + '-V', 'config-2', + tmpdir, + attempts=1, + run_as_root=False) + + def _make_vfat(self, path, tmpdir): + # NOTE(mikal): This is a little horrible, but I couldn't find an + # equivalent to genisoimage for vfat filesystems. + with open(path, 'wb') as f: + f.truncate(CONFIGDRIVESIZE_BYTES) + + utils.mkfs('vfat', path, label='config-2') + + with utils.tempdir() as mountdir: + mounted = False + try: + _, err = utils.trycmd( + 'mount', '-o', 'loop,uid=%d,gid=%d' % (os.getuid(), + os.getgid()), + path, + mountdir, + run_as_root=True) + if err: + raise exception.ConfigDriveMountFailed(operation='mount', + error=err) + mounted = True + + # NOTE(mikal): I can't just use shutils.copytree here, + # because the destination directory already + # exists. This is annoying. + for ent in os.listdir(tmpdir): + shutil.copytree(os.path.join(tmpdir, ent), + os.path.join(mountdir, ent)) + + finally: + if mounted: + utils.execute('umount', mountdir, run_as_root=True) + + def make_drive(self, path): + """Make the config drive. + + :param path: the path to place the config drive image at + + :raises ProcessExecuteError if a helper process has failed. + """ + with utils.tempdir() as tmpdir: + self._write_md_files(tmpdir) + + if CONF.config_drive_format == 'iso9660': + self._make_iso9660(path, tmpdir) + elif CONF.config_drive_format == 'vfat': + self._make_vfat(path, tmpdir) + else: + raise exception.ConfigDriveUnknownFormat( + format=CONF.config_drive_format) + + def cleanup(self): + if self.imagefile: + fileutils.delete_if_exists(self.imagefile) + + def __repr__(self): + return "" + + +def required_by(instance): + + image_prop = instance.image_meta.properties.get( + "img_config_drive", + fields.ConfigDrivePolicy.OPTIONAL) + + return (instance.config_drive or + CONF.force_config_drive or + image_prop == fields.ConfigDrivePolicy.MANDATORY + ) + + +def update_instance(instance): + """Update the instance config_drive setting if necessary + + The image or configuration file settings may override the default instance + setting. In this case the instance needs to mirror the actual + virtual machine configuration. + """ + if not instance.config_drive and required_by(instance): + instance.config_drive = True diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py new file mode 100644 index 00000000..edee9697 --- /dev/null +++ b/nova/virt/ironic/driver.py @@ -0,0 +1,1798 @@ +# Copyright 2014 Red Hat, Inc. +# Copyright 2013 Hewlett-Packard Development Company, L.P. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +A driver wrapping the Ironic API, such that Nova may provision +bare metal resources. +""" +import base64 +import gzip +import shutil +import tempfile +import time + +from oslo_log import log as logging +from oslo_serialization import jsonutils +from oslo_service import loopingcall +from oslo_utils import excutils +from oslo_utils import importutils +import six +import six.moves.urllib.parse as urlparse +from tooz import hashring as hash_ring + +from nova.api.metadata import base as instance_metadata +from nova import block_device +from nova.compute import power_state +from nova.compute import task_states +from nova.compute import vm_states +import nova.conf +from nova.console import type as console_type +from nova import context as nova_context +from nova import exception +from nova.i18n import _ +from nova import objects +from nova.objects import fields as obj_fields +from nova import servicegroup +from nova import utils +from nova.virt import configdrive +from nova.virt import driver as virt_driver +from nova.virt import firewall +from nova.virt import hardware +from nova.virt.ironic import client_wrapper +from nova.virt.ironic import ironic_states +from nova.virt.ironic import patcher +from nova.virt import netutils + + +ironic = None + +LOG = logging.getLogger(__name__) + + +CONF = nova.conf.CONF + +_POWER_STATE_MAP = { + ironic_states.POWER_ON: power_state.RUNNING, + ironic_states.NOSTATE: power_state.NOSTATE, + ironic_states.POWER_OFF: power_state.SHUTDOWN, +} + +_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL, + ironic_states.ERROR, ironic_states.DEPLOYWAIT, + ironic_states.DEPLOYING) + +_NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state', + 'target_provision_state', 'last_error', 'maintenance', + 'properties', 'instance_uuid') + +# Console state checking interval in seconds +_CONSOLE_STATE_CHECKING_INTERVAL = 1 + +# Number of hash ring partitions per service +# 5 should be fine for most deployments, as an experimental feature. +_HASH_RING_PARTITIONS = 2 ** 5 + + +def map_power_state(state): + try: + return _POWER_STATE_MAP[state] + except KeyError: + LOG.warning("Power state %s not found.", state) + return power_state.NOSTATE + + +def _get_nodes_supported_instances(cpu_arch=None): + """Return supported instances for a node.""" + if not cpu_arch: + return [] + return [(cpu_arch, + obj_fields.HVType.BAREMETAL, + obj_fields.VMMode.HVM)] + + +def _log_ironic_polling(what, node, instance): + power_state = (None if node.power_state is None else + '"%s"' % node.power_state) + tgt_power_state = (None if node.target_power_state is None else + '"%s"' % node.target_power_state) + prov_state = (None if node.provision_state is None else + '"%s"' % node.provision_state) + tgt_prov_state = (None if node.target_provision_state is None else + '"%s"' % node.target_provision_state) + LOG.debug('Still waiting for ironic node %(node)s to %(what)s: ' + 'power_state=%(power_state)s, ' + 'target_power_state=%(tgt_power_state)s, ' + 'provision_state=%(prov_state)s, ' + 'target_provision_state=%(tgt_prov_state)s', + dict(what=what, + node=node.uuid, + power_state=power_state, + tgt_power_state=tgt_power_state, + prov_state=prov_state, + tgt_prov_state=tgt_prov_state), + instance=instance) + + +class IronicDriver(virt_driver.ComputeDriver): + """Hypervisor driver for Ironic - bare metal provisioning.""" + + capabilities = {"has_imagecache": False, + "supports_recreate": False, + "supports_migrate_to_same_host": False, + "supports_attach_interface": True + } + + def __init__(self, virtapi, read_only=False): + super(IronicDriver, self).__init__(virtapi) + global ironic + if ironic is None: + ironic = importutils.import_module('ironicclient') + # NOTE(deva): work around a lack of symbols in the current version. + if not hasattr(ironic, 'exc'): + ironic.exc = importutils.import_module('ironicclient.exc') + if not hasattr(ironic, 'client'): + ironic.client = importutils.import_module( + 'ironicclient.client') + + self.firewall_driver = firewall.load_driver( + default='nova.virt.firewall.NoopFirewallDriver') + self.node_cache = {} + self.node_cache_time = 0 + self.servicegroup_api = servicegroup.API() + + self.ironicclient = client_wrapper.IronicClientWrapper() + + # This is needed for the instance flavor migration in Pike, and should + # be removed in Queens. Since this will run several times in the life + # of the driver, track the instances that have already been migrated. + self._migrated_instance_uuids = set() + + def _get_node(self, node_uuid): + """Get a node by its UUID. + + Some methods pass in variables named nodename, but are + actually UUID's. + """ + return self.ironicclient.call('node.get', node_uuid, + fields=_NODE_FIELDS) + + def _validate_instance_and_node(self, instance): + """Get the node associated with the instance. + + Check with the Ironic service that this instance is associated with a + node, and return the node. + """ + try: + return self.ironicclient.call('node.get_by_instance_uuid', + instance.uuid, fields=_NODE_FIELDS) + except ironic.exc.NotFound: + raise exception.InstanceNotFound(instance_id=instance.uuid) + + def _node_resources_unavailable(self, node_obj): + """Determine whether the node's resources are in an acceptable state. + + Determines whether the node's resources should be presented + to Nova for use based on the current power, provision and maintenance + state. This is called after _node_resources_used, so any node that + is not used and not in AVAILABLE should be considered in a 'bad' state, + and unavailable for scheduling. Returns True if unacceptable. + """ + bad_power_states = [ + ironic_states.ERROR, ironic_states.NOSTATE] + # keep NOSTATE around for compatibility + good_provision_states = [ + ironic_states.AVAILABLE, ironic_states.NOSTATE] + return (node_obj.maintenance or + node_obj.power_state in bad_power_states or + node_obj.provision_state not in good_provision_states) + + def _node_resources_used(self, node_obj): + """Determine whether the node's resources are currently used. + + Determines whether the node's resources should be considered used + or not. A node is used when it is either in the process of putting + a new instance on the node, has an instance on the node, or is in + the process of cleaning up from a deleted instance. Returns True if + used. + + If we report resources as consumed for a node that does not have an + instance on it, the resource tracker will notice there's no instances + consuming resources and try to correct us. So only nodes with an + instance attached should report as consumed here. + """ + return node_obj.instance_uuid is not None + + def _parse_node_properties(self, node): + """Helper method to parse the node's properties.""" + properties = {} + + for prop in ('cpus', 'memory_mb', 'local_gb'): + try: + properties[prop] = int(node.properties.get(prop, 0)) + except (TypeError, ValueError): + LOG.warning('Node %(uuid)s has a malformed "%(prop)s". ' + 'It should be an integer.', + {'uuid': node.uuid, 'prop': prop}) + properties[prop] = 0 + + raw_cpu_arch = node.properties.get('cpu_arch', None) + try: + cpu_arch = obj_fields.Architecture.canonicalize(raw_cpu_arch) + except exception.InvalidArchitectureName: + cpu_arch = None + if not cpu_arch: + LOG.warning("cpu_arch not defined for node '%s'", node.uuid) + + properties['cpu_arch'] = cpu_arch + properties['raw_cpu_arch'] = raw_cpu_arch + properties['capabilities'] = node.properties.get('capabilities') + return properties + + def _parse_node_instance_info(self, node, props): + """Helper method to parse the node's instance info. + + If a property cannot be looked up via instance_info, use the original + value from the properties dict. This is most likely to be correct; + it should only be incorrect if the properties were changed directly + in Ironic while an instance was deployed. + """ + instance_info = {} + + # add this key because it's different in instance_info for some reason + props['vcpus'] = props['cpus'] + for prop in ('vcpus', 'memory_mb', 'local_gb'): + original = props[prop] + try: + instance_info[prop] = int(node.instance_info.get(prop, + original)) + except (TypeError, ValueError): + LOG.warning('Node %(uuid)s has a malformed "%(prop)s". ' + 'It should be an integer but its value ' + 'is "%(value)s".', + {'uuid': node.uuid, 'prop': prop, + 'value': node.instance_info.get(prop)}) + instance_info[prop] = original + + return instance_info + + def _node_resource(self, node): + """Helper method to create resource dict from node stats.""" + properties = self._parse_node_properties(node) + + vcpus = properties['cpus'] + memory_mb = properties['memory_mb'] + local_gb = properties['local_gb'] + raw_cpu_arch = properties['raw_cpu_arch'] + cpu_arch = properties['cpu_arch'] + + nodes_extra_specs = {} + + # NOTE(deva): In Havana and Icehouse, the flavor was required to link + # to an arch-specific deploy kernel and ramdisk pair, and so the flavor + # also had to have extra_specs['cpu_arch'], which was matched against + # the ironic node.properties['cpu_arch']. + # With Juno, the deploy image(s) may be referenced directly by the + # node.driver_info, and a flavor no longer needs to contain any of + # these three extra specs, though the cpu_arch may still be used + # in a heterogeneous environment, if so desired. + # NOTE(dprince): we use the raw cpu_arch here because extra_specs + # filters aren't canonicalized + nodes_extra_specs['cpu_arch'] = raw_cpu_arch + + # NOTE(gilliard): To assist with more precise scheduling, if the + # node.properties contains a key 'capabilities', we expect the value + # to be of the form "k1:v1,k2:v2,etc.." which we add directly as + # key/value pairs into the node_extra_specs to be used by the + # ComputeCapabilitiesFilter + capabilities = properties['capabilities'] + if capabilities: + for capability in str(capabilities).split(','): + parts = capability.split(':') + if len(parts) == 2 and parts[0] and parts[1]: + nodes_extra_specs[parts[0].strip()] = parts[1] + else: + LOG.warning("Ignoring malformed capability '%s'. " + "Format should be 'key:val'.", capability) + + vcpus_used = 0 + memory_mb_used = 0 + local_gb_used = 0 + + if self._node_resources_used(node): + # Node is in the process of deploying, is deployed, or is in + # the process of cleaning up from a deploy. Report all of its + # resources as in use. + vcpus_used = vcpus + memory_mb_used = memory_mb + local_gb_used = local_gb + # Always checking allows us to catch the case where Nova thinks there + # are available resources on the Node, but Ironic does not (because it + # is not in a usable state): https://launchpad.net/bugs/1503453 + elif self._node_resources_unavailable(node): + # The node's current state is such that it should not present any + # of its resources to Nova + vcpus = 0 + memory_mb = 0 + local_gb = 0 + + dic = { + 'hypervisor_hostname': str(node.uuid), + 'hypervisor_type': self._get_hypervisor_type(), + 'hypervisor_version': self._get_hypervisor_version(), + 'resource_class': node.resource_class, + # The Ironic driver manages multiple hosts, so there are + # likely many different CPU models in use. As such it is + # impossible to provide any meaningful info on the CPU + # model of the "host" + 'cpu_info': None, + 'vcpus': vcpus, + 'vcpus_used': vcpus_used, + 'local_gb': local_gb, + 'local_gb_used': local_gb_used, + 'disk_available_least': local_gb - local_gb_used, + 'memory_mb': memory_mb, + 'memory_mb_used': memory_mb_used, + 'supported_instances': _get_nodes_supported_instances(cpu_arch), + 'stats': nodes_extra_specs, + 'numa_topology': None, + } + return dic + + def _start_firewall(self, instance, network_info): + self.firewall_driver.setup_basic_filtering(instance, network_info) + self.firewall_driver.prepare_instance_filter(instance, network_info) + self.firewall_driver.apply_instance_filter(instance, network_info) + + def _stop_firewall(self, instance, network_info): + self.firewall_driver.unfilter_instance(instance, network_info) + + def _add_instance_info_to_node(self, node, instance, image_meta, flavor, + preserve_ephemeral=None, + block_device_info=None): + + root_bdm = block_device.get_root_bdm( + virt_driver.block_device_info_get_mapping(block_device_info)) + boot_from_volume = root_bdm is not None + patch = patcher.create(node).get_deploy_patch(instance, + image_meta, + flavor, + preserve_ephemeral, + boot_from_volume) + + # Associate the node with an instance + patch.append({'path': '/instance_uuid', 'op': 'add', + 'value': instance.uuid}) + try: + # FIXME(lucasagomes): The "retry_on_conflict" parameter was added + # to basically causes the deployment to fail faster in case the + # node picked by the scheduler is already associated with another + # instance due bug #1341420. + self.ironicclient.call('node.update', node.uuid, patch, + retry_on_conflict=False) + except ironic.exc.BadRequest: + msg = (_("Failed to add deploy parameters on node %(node)s " + "when provisioning the instance %(instance)s") + % {'node': node.uuid, 'instance': instance.uuid}) + LOG.error(msg) + raise exception.InstanceDeployFailure(msg) + + def _remove_instance_info_from_node(self, node, instance): + patch = [{'path': '/instance_info', 'op': 'remove'}, + {'path': '/instance_uuid', 'op': 'remove'}] + try: + self.ironicclient.call('node.update', node.uuid, patch) + except ironic.exc.BadRequest as e: + LOG.warning("Failed to remove deploy parameters from node " + "%(node)s when unprovisioning the instance " + "%(instance)s: %(reason)s", + {'node': node.uuid, 'instance': instance.uuid, + 'reason': six.text_type(e)}) + + def _add_volume_target_info(self, context, instance, block_device_info): + bdms = virt_driver.block_device_info_get_mapping(block_device_info) + + for bdm in bdms: + # TODO(TheJulia): In Queens, we should refactor the check below + # to something more elegent, as is_volume is not proxied through + # to the DriverVolumeBlockDevice object. Until then, we are + # checking the underlying object's status. + if not bdm._bdm_obj.is_volume: + continue + + connection_info = jsonutils.loads(bdm._bdm_obj.connection_info) + target_properties = connection_info['data'] + driver_volume_type = connection_info['driver_volume_type'] + + try: + self.ironicclient.call('volume_target.create', + node_uuid=instance.node, + volume_type=driver_volume_type, + properties=target_properties, + boot_index=bdm._bdm_obj.boot_index, + volume_id=bdm._bdm_obj.volume_id) + except (ironic.exc.BadRequest, ironic.exc.Conflict): + msg = (_("Failed to add volume target information of " + "volume %(volume)s on node %(node)s when " + "provisioning the instance") + % {'volume': bdm._bdm_obj.volume_id, + 'node': instance.node}) + LOG.error(msg, instance=instance) + raise exception.InstanceDeployFailure(msg) + + def _cleanup_volume_target_info(self, instance): + targets = self.ironicclient.call('node.list_volume_targets', + instance.node, detail=True) + for target in targets: + volume_target_id = target.uuid + try: + self.ironicclient.call('volume_target.delete', + volume_target_id) + except ironic.exc.NotFound: + LOG.debug("Volume target information %(target)s of volume " + "%(volume)s is already removed from node %(node)s", + {'target': volume_target_id, + 'volume': target.volume_id, + 'node': instance.node}, + instance=instance) + except ironic.exc.ClientException as e: + LOG.warning("Failed to remove volume target information " + "%(target)s of volume %(volume)s from node " + "%(node)s when unprovisioning the instance: " + "%(reason)s", + {'target': volume_target_id, + 'volume': target.volume_id, + 'node': instance.node, + 'reason': e}, + instance=instance) + + def _cleanup_deploy(self, node, instance, network_info): + self._cleanup_volume_target_info(instance) + self._unplug_vifs(node, instance, network_info) + self._stop_firewall(instance, network_info) + + def _wait_for_active(self, instance): + """Wait for the node to be marked as ACTIVE in Ironic.""" + instance.refresh() + if (instance.task_state == task_states.DELETING or + instance.vm_state in (vm_states.ERROR, vm_states.DELETED)): + raise exception.InstanceDeployFailure( + _("Instance %s provisioning was aborted") % instance.uuid) + + node = self._validate_instance_and_node(instance) + if node.provision_state == ironic_states.ACTIVE: + # job is done + LOG.debug("Ironic node %(node)s is now ACTIVE", + dict(node=node.uuid), instance=instance) + raise loopingcall.LoopingCallDone() + + if node.target_provision_state in (ironic_states.DELETED, + ironic_states.AVAILABLE): + # ironic is trying to delete it now + raise exception.InstanceNotFound(instance_id=instance.uuid) + + if node.provision_state in (ironic_states.NOSTATE, + ironic_states.AVAILABLE): + # ironic already deleted it + raise exception.InstanceNotFound(instance_id=instance.uuid) + + if node.provision_state == ironic_states.DEPLOYFAIL: + # ironic failed to deploy + msg = (_("Failed to provision instance %(inst)s: %(reason)s") + % {'inst': instance.uuid, 'reason': node.last_error}) + raise exception.InstanceDeployFailure(msg) + + _log_ironic_polling('become ACTIVE', node, instance) + + def _wait_for_power_state(self, instance, message): + """Wait for the node to complete a power state change.""" + node = self._validate_instance_and_node(instance) + + if node.target_power_state == ironic_states.NOSTATE: + raise loopingcall.LoopingCallDone() + + _log_ironic_polling(message, node, instance) + + def init_host(self, host): + """Initialize anything that is necessary for the driver to function. + + :param host: the hostname of the compute host. + + """ + self._refresh_hash_ring(nova_context.get_admin_context()) + + @staticmethod + def _pike_flavor_migration_for_node(ctx, node_rc, instance_uuid): + normalized_rc = obj_fields.ResourceClass.normalize_name(node_rc) + instance = objects.Instance.get_by_uuid(ctx, instance_uuid, + expected_attrs=["flavor"]) + specs = instance.flavor.extra_specs + resource_key = "resources:%s" % normalized_rc + if resource_key in specs: + # The compute must have been restarted, and the instance.flavor + # has already been migrated + return False + specs[resource_key] = "1" + instance.save() + return True + + def _pike_flavor_migration(self, node_uuids): + """This code is needed in Pike to prevent problems where an operator + has already adjusted their flavors to add the custom resource class to + extra_specs. Since existing ironic instances will not have this in + their extra_specs, they will only have allocations against + VCPU/RAM/disk. By adding just the custom RC to the existing flavor + extra_specs, the periodic call to update_available_resources() will add + an allocation against the custom resource class, and prevent placement + from thinking that that node is available. This code can be removed in + Queens, and will need to be updated to also alter extra_specs to + zero-out the old-style standard resource classes of VCPU, MEMORY_MB, + and DISK_GB. + """ + ctx = nova_context.get_admin_context() + + for node_uuid in node_uuids: + node = self._node_from_cache(node_uuid) + if not node: + continue + node_rc = node.resource_class + if not node_rc: + LOG.warning("Node %(node)s does not have its resource_class " + "set.", {"node": node.uuid}) + continue + if node.instance_uuid in self._migrated_instance_uuids: + continue + self._pike_flavor_migration_for_node(ctx, node_rc, + node.instance_uuid) + self._migrated_instance_uuids.add(node.instance_uuid) + LOG.debug("The flavor extra_specs for Ironic instance %(inst)s " + "have been updated for custom resource class '%(rc)s'.", + {"inst": node.instance_uuid, "rc": node_rc}) + return + + def _get_hypervisor_type(self): + """Get hypervisor type.""" + return 'ironic' + + def _get_hypervisor_version(self): + """Returns the version of the Ironic API service endpoint.""" + return client_wrapper.IRONIC_API_VERSION[0] + + def instance_exists(self, instance): + """Checks the existence of an instance. + + Checks the existence of an instance. This is an override of the + base method for efficiency. + + :param instance: The instance object. + :returns: True if the instance exists. False if not. + + """ + try: + self._validate_instance_and_node(instance) + return True + except exception.InstanceNotFound: + return False + + def _get_node_list(self, **kwargs): + """Helper function to return the list of nodes. + + If unable to connect ironic server, an empty list is returned. + + :returns: a list of raw node from ironic + + """ + node_list = [] + try: + node_list = self.ironicclient.call("node.list", **kwargs) + except exception.NovaException as e: + LOG.error("Failed to get the list of nodes from the Ironic " + "inventory. Error: %s", e) + except Exception as e: + LOG.error("An unknown error has occurred when trying to get the " + "list of nodes from the Ironic inventory. Error: %s", e) + return node_list + + def list_instances(self): + """Return the names of all the instances provisioned. + + :returns: a list of instance names. + + """ + # NOTE(lucasagomes): limit == 0 is an indicator to continue + # pagination until there're no more values to be returned. + node_list = self._get_node_list(associated=True, limit=0) + context = nova_context.get_admin_context() + return [objects.Instance.get_by_uuid(context, + i.instance_uuid).name + for i in node_list] + + def list_instance_uuids(self): + """Return the UUIDs of all the instances provisioned. + + :returns: a list of instance UUIDs. + + """ + # NOTE(lucasagomes): limit == 0 is an indicator to continue + # pagination until there're no more values to be returned. + return list(n.instance_uuid + for n in self._get_node_list(associated=True, limit=0)) + + def node_is_available(self, nodename): + """Confirms a Nova hypervisor node exists in the Ironic inventory. + + :param nodename: The UUID of the node. Parameter is called nodename + even though it is a UUID to keep method signature + the same as inherited class. + :returns: True if the node exists, False if not. + + """ + # NOTE(comstud): We can cheat and use caching here. This method + # just needs to return True for nodes that exist. It doesn't + # matter if the data is stale. Sure, it's possible that removing + # node from Ironic will cause this method to return True until + # the next call to 'get_available_nodes', but there shouldn't + # be much harm. There's already somewhat of a race. + if not self.node_cache: + # Empty cache, try to populate it. + self._refresh_cache() + + # nodename is the ironic node's UUID. + if nodename in self.node_cache: + return True + + # NOTE(comstud): Fallback and check Ironic. This case should be + # rare. + try: + # nodename is the ironic node's UUID. + self._get_node(nodename) + return True + except ironic.exc.NotFound: + return False + + def _refresh_hash_ring(self, ctxt): + service_list = objects.ServiceList.get_all_computes_by_hv_type( + ctxt, self._get_hypervisor_type()) + services = set() + for svc in service_list: + is_up = self.servicegroup_api.service_is_up(svc) + if is_up: + services.add(svc.host) + # NOTE(jroll): always make sure this service is in the list, because + # only services that have something registered in the compute_nodes + # table will be here so far, and we might be brand new. + services.add(CONF.host) + + self.hash_ring = hash_ring.HashRing(services, + partitions=_HASH_RING_PARTITIONS) + + def _refresh_cache(self): + # NOTE(lucasagomes): limit == 0 is an indicator to continue + # pagination until there're no more values to be returned. + ctxt = nova_context.get_admin_context() + self._refresh_hash_ring(ctxt) + instances = objects.InstanceList.get_uuids_by_host(ctxt, CONF.host) + node_cache = {} + + for node in self._get_node_list(detail=True, limit=0): + # NOTE(jroll): we always manage the nodes for instances we manage + if node.instance_uuid in instances: + node_cache[node.uuid] = node + + # NOTE(jroll): check if the node matches us in the hash ring, and + # does not have an instance_uuid (which would imply the node has + # an instance managed by another compute service). + # Note that this means nodes with an instance that was deleted in + # nova while the service was down, and not yet reaped, will not be + # reported until the periodic task cleans it up. + elif (node.instance_uuid is None and + CONF.host in + self.hash_ring.get_nodes(node.uuid.encode('utf-8'))): + node_cache[node.uuid] = node + + self.node_cache = node_cache + self.node_cache_time = time.time() + # For Pike, we need to ensure that all instances have their flavor + # migrated to include the resource_class. Since there could be many, + # many instances controlled by this host, spawn this asynchronously so + # as not to block this service. + node_uuids = [node.uuid for node in self.node_cache.values() + if node.instance_uuid and + node.instance_uuid not in self._migrated_instance_uuids] + if node_uuids: + # No need to run unless something has changed + utils.spawn_n(self._pike_flavor_migration, node_uuids) + + def get_available_nodes(self, refresh=False): + """Returns the UUIDs of Ironic nodes managed by this compute service. + + We use consistent hashing to distribute Ironic nodes between all + available compute services. The subset of nodes managed by a given + compute service is determined by the following rules: + + * any node with an instance managed by the compute service + * any node that is mapped to the compute service on the hash ring + * no nodes with instances managed by another compute service + + The ring is rebalanced as nova-compute services are brought up and + down. Note that this rebalance does not happen at the same time for + all compute services, so a node may be managed by multiple compute + services for a small amount of time. + + :param refresh: Boolean value; If True run update first. Ignored by + this driver. + :returns: a list of UUIDs + + """ + # NOTE(jroll) we refresh the cache every time this is called + # because it needs to happen in the resource tracker + # periodic task. This task doesn't pass refresh=True, + # unfortunately. + self._refresh_cache() + + node_uuids = list(self.node_cache.keys()) + LOG.debug("Returning %(num_nodes)s available node(s)", + dict(num_nodes=len(node_uuids))) + + return node_uuids + + def get_inventory(self, nodename): + """Return a dict, keyed by resource class, of inventory information for + the supplied node. + """ + # nodename is the ironic node's UUID. + node = self._node_from_cache(nodename) + info = self._node_resource(node) + # TODO(jaypipes): Completely remove the reporting of VCPU, MEMORY_MB, + # and DISK_GB resource classes in early Queens when Ironic nodes will + # *always* return the custom resource class that represents the + # baremetal node class in an atomic, singular unit. + if info['vcpus'] == 0: + # NOTE(jaypipes): The driver can return 0-valued vcpus when the + # node is "disabled". In the future, we should detach inventory + # accounting from the concept of a node being disabled or not. The + # two things don't really have anything to do with each other. + LOG.debug('Node %(node)s is not ready for a deployment, ' + 'reporting an empty inventory for it. Node\'s ' + 'provision state is %(prov)s, power state is ' + '%(power)s and maintenance is %(maint)s.', + {'node': node.uuid, 'prov': node.provision_state, + 'power': node.power_state, 'maint': node.maintenance}) + return {} + + result = { + obj_fields.ResourceClass.VCPU: { + 'total': info['vcpus'], + 'reserved': 0, + 'min_unit': 1, + 'max_unit': info['vcpus'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + obj_fields.ResourceClass.MEMORY_MB: { + 'total': info['memory_mb'], + 'reserved': 0, + 'min_unit': 1, + 'max_unit': info['memory_mb'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + obj_fields.ResourceClass.DISK_GB: { + 'total': info['local_gb'], + 'reserved': 0, + 'min_unit': 1, + 'max_unit': info['local_gb'], + 'step_size': 1, + 'allocation_ratio': 1.0, + }, + } + rc_name = info.get('resource_class') + if rc_name is not None: + # TODO(jaypipes): Raise an exception in Queens if Ironic doesn't + # report a resource class for the node + norm_name = obj_fields.ResourceClass.normalize_name(rc_name) + if norm_name is not None: + result[norm_name] = { + 'total': 1, + 'reserved': 0, + 'min_unit': 1, + 'max_unit': 1, + 'step_size': 1, + 'allocation_ratio': 1.0, + } + + return result + + def get_available_resource(self, nodename): + """Retrieve resource information. + + This method is called when nova-compute launches, and + as part of a periodic task that records the results in the DB. + + :param nodename: the UUID of the node. + :returns: a dictionary describing resources. + + """ + # NOTE(comstud): We can cheat and use caching here. This method is + # only called from a periodic task and right after the above + # get_available_nodes() call is called. + if not self.node_cache: + # Well, it's also called from init_host(), so if we have empty + # cache, let's try to populate it. + self._refresh_cache() + + # nodename is the ironic node's UUID. + node = self._node_from_cache(nodename) + return self._node_resource(node) + + def _node_from_cache(self, node_uuid): + """Returns a node from the cache, retrieving the node from Ironic API + if the node doesn't yet exist in the cache. + """ + cache_age = time.time() - self.node_cache_time + if node_uuid in self.node_cache: + LOG.debug("Using cache for node %(node)s, age: %(age)s", + {'node': node_uuid, 'age': cache_age}) + return self.node_cache[node_uuid] + else: + LOG.debug("Node %(node)s not found in cache, age: %(age)s", + {'node': node_uuid, 'age': cache_age}) + node = self._get_node(node_uuid) + self.node_cache[node_uuid] = node + return node + + def get_info(self, instance): + """Get the current state and resource usage for this instance. + + If the instance is not found this method returns (a dictionary + with) NOSTATE and all resources == 0. + + :param instance: the instance object. + :returns: a InstanceInfo object + """ + try: + node = self._validate_instance_and_node(instance) + except exception.InstanceNotFound: + return hardware.InstanceInfo( + state=map_power_state(ironic_states.NOSTATE)) + + properties = self._parse_node_properties(node) + memory_kib = properties['memory_mb'] * 1024 + if memory_kib == 0: + LOG.warning("Warning, memory usage is 0 for " + "%(instance)s on baremetal node %(node)s.", + {'instance': instance.uuid, + 'node': instance.node}) + + num_cpu = properties['cpus'] + if num_cpu == 0: + LOG.warning("Warning, number of cpus is 0 for " + "%(instance)s on baremetal node %(node)s.", + {'instance': instance.uuid, + 'node': instance.node}) + + return hardware.InstanceInfo(state=map_power_state(node.power_state)) + + def deallocate_networks_on_reschedule(self, instance): + """Does the driver want networks deallocated on reschedule? + + :param instance: the instance object. + :returns: Boolean value. If True deallocate networks on reschedule. + """ + return True + + def _get_network_metadata(self, node, network_info): + """Gets a more complete representation of the instance network info. + + This data is exposed as network_data.json in the metadata service and + the config drive. + + :param node: The node object. + :param network_info: Instance network information. + """ + base_metadata = netutils.get_network_metadata(network_info) + + # TODO(vdrok): change to doing a single "detailed vif list" call, + # when added to ironic API, response to that will contain all + # necessary information. Then we will be able to avoid looking at + # internal_info/extra fields. + ports = self.ironicclient.call("node.list_ports", + node.uuid, detail=True) + portgroups = self.ironicclient.call("portgroup.list", node=node.uuid, + detail=True) + vif_id_to_objects = {'ports': {}, 'portgroups': {}} + for collection, name in ((ports, 'ports'), (portgroups, 'portgroups')): + for p in collection: + vif_id = (p.internal_info.get('tenant_vif_port_id') or + p.extra.get('vif_port_id')) + if vif_id: + vif_id_to_objects[name][vif_id] = p + + additional_links = [] + for link in base_metadata['links']: + vif_id = link['vif_id'] + if vif_id in vif_id_to_objects['portgroups']: + pg = vif_id_to_objects['portgroups'][vif_id] + pg_ports = [p for p in ports if p.portgroup_uuid == pg.uuid] + link.update({'type': 'bond', 'bond_mode': pg.mode, + 'bond_links': []}) + # If address is set on the portgroup, an (ironic) vif-attach + # call has already updated neutron with the port address; + # reflect it here. Otherwise, an address generated by neutron + # will be used instead (code is elsewhere to handle this case). + if pg.address: + link.update({'ethernet_mac_address': pg.address}) + for prop in pg.properties: + # These properties are the bonding driver options described + # at https://www.kernel.org/doc/Documentation/networking/bonding.txt # noqa + # cloud-init checks the same way, parameter name has to + # start with bond + key = prop if prop.startswith('bond') else 'bond_%s' % prop + link[key] = pg.properties[prop] + for port in pg_ports: + # This won't cause any duplicates to be added. A port + # cannot be in more than one port group for the same + # node. + additional_links.append({ + 'id': port.uuid, + 'type': 'phy', 'ethernet_mac_address': port.address, + }) + link['bond_links'].append(port.uuid) + elif vif_id in vif_id_to_objects['ports']: + p = vif_id_to_objects['ports'][vif_id] + # Ironic updates neutron port's address during attachment + link.update({'ethernet_mac_address': p.address, + 'type': 'phy'}) + + base_metadata['links'].extend(additional_links) + return base_metadata + + def _generate_configdrive(self, context, instance, node, network_info, + extra_md=None, files=None): + """Generate a config drive. + + :param instance: The instance object. + :param node: The node object. + :param network_info: Instance network information. + :param extra_md: Optional, extra metadata to be added to the + configdrive. + :param files: Optional, a list of paths to files to be added to + the configdrive. + + """ + if not extra_md: + extra_md = {} + + i_meta = instance_metadata.InstanceMetadata(instance, + content=files, extra_md=extra_md, network_info=network_info, + network_metadata=self._get_network_metadata(node, network_info), + request_context=context) + + with tempfile.NamedTemporaryFile() as uncompressed: + with configdrive.ConfigDriveBuilder(instance_md=i_meta) as cdb: + cdb.make_drive(uncompressed.name) + + with tempfile.NamedTemporaryFile() as compressed: + # compress config drive + with gzip.GzipFile(fileobj=compressed, mode='wb') as gzipped: + uncompressed.seek(0) + shutil.copyfileobj(uncompressed, gzipped) + + # base64 encode config drive + compressed.seek(0) + return base64.b64encode(compressed.read()) + + def spawn(self, context, instance, image_meta, injected_files, + admin_password, network_info=None, block_device_info=None): + """Deploy an instance. + + :param context: The security context. + :param instance: The instance object. + :param image_meta: Image dict returned by nova.image.glance + that defines the image from which to boot this instance. + :param injected_files: User files to inject into instance. + :param admin_password: Administrator password to set in + instance. + :param network_info: Instance network information. + :param block_device_info: Instance block device + information. + """ + LOG.debug('Spawn called for instance', instance=instance) + + # The compute manager is meant to know the node uuid, so missing uuid + # is a significant issue. It may mean we've been passed the wrong data. + node_uuid = instance.get('node') + if not node_uuid: + raise ironic.exc.BadRequest( + _("Ironic node uuid not supplied to " + "driver for instance %s.") % instance.uuid) + + node = self._get_node(node_uuid) + flavor = instance.flavor + + self._add_instance_info_to_node(node, instance, image_meta, flavor, + block_device_info=block_device_info) + + try: + self._add_volume_target_info(context, instance, block_device_info) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.error("Error preparing deploy for instance " + "on baremetal node %(node)s.", + {'node': node_uuid}, + instance=instance) + self._cleanup_deploy(node, instance, network_info) + + # NOTE(Shrews): The default ephemeral device needs to be set for + # services (like cloud-init) that depend on it being returned by the + # metadata server. Addresses bug https://launchpad.net/bugs/1324286. + if flavor.ephemeral_gb: + instance.default_ephemeral_device = '/dev/sda1' + instance.save() + + # validate we are ready to do the deploy + validate_chk = self.ironicclient.call("node.validate", node_uuid) + if (not validate_chk.deploy.get('result') + or not validate_chk.power.get('result') + or not validate_chk.storage.get('result')): + # something is wrong. undo what we have done + self._cleanup_deploy(node, instance, network_info) + raise exception.ValidationError(_( + "Ironic node: %(id)s failed to validate." + " (deploy: %(deploy)s, power: %(power)s," + " storage: %(storage)s)") + % {'id': node.uuid, + 'deploy': validate_chk.deploy, + 'power': validate_chk.power, + 'storage': validate_chk.storage}) + + # prepare for the deploy + try: + self._plug_vifs(node, instance, network_info) + self._start_firewall(instance, network_info) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.error("Error preparing deploy for instance " + "%(instance)s on baremetal node %(node)s.", + {'instance': instance.uuid, + 'node': node_uuid}) + self._cleanup_deploy(node, instance, network_info) + + # Config drive + configdrive_value = None + if configdrive.required_by(instance): + extra_md = {} + if admin_password: + extra_md['admin_pass'] = admin_password + + try: + configdrive_value = self._generate_configdrive( + context, instance, node, network_info, extra_md=extra_md, + files=injected_files) + except Exception as e: + with excutils.save_and_reraise_exception(): + msg = ("Failed to build configdrive: %s" % + six.text_type(e)) + LOG.error(msg, instance=instance) + self._cleanup_deploy(node, instance, network_info) + + LOG.info("Config drive for instance %(instance)s on " + "baremetal node %(node)s created.", + {'instance': instance['uuid'], 'node': node_uuid}) + + # trigger the node deploy + try: + self.ironicclient.call("node.set_provision_state", node_uuid, + ironic_states.ACTIVE, + configdrive=configdrive_value) + except Exception as e: + with excutils.save_and_reraise_exception(): + LOG.error("Failed to request Ironic to provision instance " + "%(inst)s: %(reason)s", + {'inst': instance.uuid, + 'reason': six.text_type(e)}) + self._cleanup_deploy(node, instance, network_info) + + timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active, + instance) + try: + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info('Successfully provisioned Ironic node %s', + node.uuid, instance=instance) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.error("Error deploying instance %(instance)s on " + "baremetal node %(node)s.", + {'instance': instance.uuid, + 'node': node_uuid}) + + def _unprovision(self, instance, node): + """This method is called from destroy() to unprovision + already provisioned node after required checks. + """ + try: + self.ironicclient.call("node.set_provision_state", node.uuid, + "deleted") + except Exception as e: + # if the node is already in a deprovisioned state, continue + # This should be fixed in Ironic. + # TODO(deva): This exception should be added to + # python-ironicclient and matched directly, + # rather than via __name__. + if getattr(e, '__name__', None) != 'InstanceDeployFailure': + raise + + # using a dict because this is modified in the local method + data = {'tries': 0} + + def _wait_for_provision_state(): + try: + node = self._validate_instance_and_node(instance) + except exception.InstanceNotFound: + LOG.debug("Instance already removed from Ironic", + instance=instance) + raise loopingcall.LoopingCallDone() + if node.provision_state in (ironic_states.NOSTATE, + ironic_states.CLEANING, + ironic_states.CLEANWAIT, + ironic_states.CLEANFAIL, + ironic_states.AVAILABLE): + # From a user standpoint, the node is unprovisioned. If a node + # gets into CLEANFAIL state, it must be fixed in Ironic, but we + # can consider the instance unprovisioned. + LOG.debug("Ironic node %(node)s is in state %(state)s, " + "instance is now unprovisioned.", + dict(node=node.uuid, state=node.provision_state), + instance=instance) + raise loopingcall.LoopingCallDone() + + if data['tries'] >= CONF.ironic.api_max_retries + 1: + msg = (_("Error destroying the instance on node %(node)s. " + "Provision state still '%(state)s'.") + % {'state': node.provision_state, + 'node': node.uuid}) + LOG.error(msg) + raise exception.NovaException(msg) + else: + data['tries'] += 1 + + _log_ironic_polling('unprovision', node, instance) + + # wait for the state transition to finish + timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state) + timer.start(interval=CONF.ironic.api_retry_interval).wait() + + def destroy(self, context, instance, network_info, + block_device_info=None, destroy_disks=True): + """Destroy the specified instance, if it can be found. + + :param context: The security context. + :param instance: The instance object. + :param network_info: Instance network information. + :param block_device_info: Instance block device + information. Ignored by this driver. + :param destroy_disks: Indicates if disks should be + destroyed. Ignored by this driver. + """ + LOG.debug('Destroy called for instance', instance=instance) + try: + node = self._validate_instance_and_node(instance) + except exception.InstanceNotFound: + LOG.warning("Destroy called on non-existing instance %s.", + instance.uuid) + # NOTE(deva): if nova.compute.ComputeManager._delete_instance() + # is called on a non-existing instance, the only way + # to delete it is to return from this method + # without raising any exceptions. + return + + if node.provision_state in _UNPROVISION_STATES: + self._unprovision(instance, node) + else: + # NOTE(hshiina): if spawn() fails before ironic starts + # provisioning, instance information should be + # removed from ironic node. + self._remove_instance_info_from_node(node, instance) + + self._cleanup_deploy(node, instance, network_info) + LOG.info('Successfully unprovisioned Ironic node %s', + node.uuid, instance=instance) + + def reboot(self, context, instance, network_info, reboot_type, + block_device_info=None, bad_volumes_callback=None): + """Reboot the specified instance. + + NOTE: Unlike the libvirt driver, this method does not delete + and recreate the instance; it preserves local state. + + :param context: The security context. + :param instance: The instance object. + :param network_info: Instance network information. Ignored by + this driver. + :param reboot_type: Either a HARD or SOFT reboot. + :param block_device_info: Info pertaining to attached volumes. + Ignored by this driver. + :param bad_volumes_callback: Function to handle any bad volumes + encountered. Ignored by this driver. + + """ + LOG.debug('Reboot(type %s) called for instance', + reboot_type, instance=instance) + node = self._validate_instance_and_node(instance) + + hard = True + if reboot_type == 'SOFT': + try: + self.ironicclient.call("node.set_power_state", node.uuid, + 'reboot', soft=True) + hard = False + except ironic.exc.BadRequest as exc: + LOG.info('Soft reboot is not supported by ironic hardware ' + 'driver. Falling back to hard reboot: %s', + exc, + instance=instance) + + if hard: + self.ironicclient.call("node.set_power_state", node.uuid, 'reboot') + + timer = loopingcall.FixedIntervalLoopingCall( + self._wait_for_power_state, instance, 'reboot') + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info('Successfully rebooted(type %(type)s) Ironic node %(node)s', + {'type': ('HARD' if hard else 'SOFT'), + 'node': node.uuid}, + instance=instance) + + def power_off(self, instance, timeout=0, retry_interval=0): + """Power off the specified instance. + + NOTE: Unlike the libvirt driver, this method does not delete + and recreate the instance; it preserves local state. + + :param instance: The instance object. + :param timeout: time to wait for node to shutdown. If it is set, + soft power off is attempted before hard power off. + :param retry_interval: How often to signal node while waiting + for it to shutdown. Ignored by this driver. Retrying depends on + Ironic hardware driver. + """ + LOG.debug('Power off called for instance', instance=instance) + node = self._validate_instance_and_node(instance) + + if timeout: + try: + self.ironicclient.call("node.set_power_state", node.uuid, + 'off', soft=True, timeout=timeout) + + timer = loopingcall.FixedIntervalLoopingCall( + self._wait_for_power_state, instance, 'soft power off') + timer.start(interval=CONF.ironic.api_retry_interval).wait() + node = self._validate_instance_and_node(instance) + if node.power_state == ironic_states.POWER_OFF: + LOG.info('Successfully soft powered off Ironic node %s', + node.uuid, instance=instance) + return + LOG.info("Failed to soft power off instance " + "%(instance)s on baremetal node %(node)s " + "within the required timeout %(timeout)d " + "seconds due to error: %(reason)s. " + "Attempting hard power off.", + {'instance': instance.uuid, + 'timeout': timeout, + 'node': node.uuid, + 'reason': node.last_error}, + instance=instance) + except ironic.exc.ClientException as e: + LOG.info("Failed to soft power off instance " + "%(instance)s on baremetal node %(node)s " + "due to error: %(reason)s. " + "Attempting hard power off.", + {'instance': instance.uuid, + 'node': node.uuid, + 'reason': e}, + instance=instance) + + self.ironicclient.call("node.set_power_state", node.uuid, 'off') + timer = loopingcall.FixedIntervalLoopingCall( + self._wait_for_power_state, instance, 'power off') + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info('Successfully hard powered off Ironic node %s', + node.uuid, instance=instance) + + def power_on(self, context, instance, network_info, + block_device_info=None): + """Power on the specified instance. + + NOTE: Unlike the libvirt driver, this method does not delete + and recreate the instance; it preserves local state. + + :param context: The security context. + :param instance: The instance object. + :param network_info: Instance network information. Ignored by + this driver. + :param block_device_info: Instance block device + information. Ignored by this driver. + + """ + LOG.debug('Power on called for instance', instance=instance) + node = self._validate_instance_and_node(instance) + self.ironicclient.call("node.set_power_state", node.uuid, 'on') + + timer = loopingcall.FixedIntervalLoopingCall( + self._wait_for_power_state, instance, 'power on') + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info('Successfully powered on Ironic node %s', + node.uuid, instance=instance) + + def trigger_crash_dump(self, instance): + """Trigger crash dump mechanism on the given instance. + + Stalling instances can be triggered to dump the crash data. How the + guest OS reacts in details, depends on the configuration of it. + + :param instance: The instance where the crash dump should be triggered. + + :return: None + """ + LOG.debug('Trigger crash dump called for instance', instance=instance) + node = self._validate_instance_and_node(instance) + + self.ironicclient.call("node.inject_nmi", node.uuid) + + LOG.info('Successfully triggered crash dump into Ironic node %s', + node.uuid, instance=instance) + + def refresh_security_group_rules(self, security_group_id): + """Refresh security group rules from data store. + + Invoked when security group rules are updated. + + :param security_group_id: The security group id. + + """ + self.firewall_driver.refresh_security_group_rules(security_group_id) + + def refresh_instance_security_rules(self, instance): + """Refresh security group rules from data store. + + Gets called when an instance gets added to or removed from + the security group the instance is a member of or if the + group gains or loses a rule. + + :param instance: The instance object. + + """ + self.firewall_driver.refresh_instance_security_rules(instance) + + def ensure_filtering_rules_for_instance(self, instance, network_info): + """Set up filtering rules. + + :param instance: The instance object. + :param network_info: Instance network information. + + """ + self.firewall_driver.setup_basic_filtering(instance, network_info) + self.firewall_driver.prepare_instance_filter(instance, network_info) + + def unfilter_instance(self, instance, network_info): + """Stop filtering instance. + + :param instance: The instance object. + :param network_info: Instance network information. + + """ + self.firewall_driver.unfilter_instance(instance, network_info) + + def _plug_vifs(self, node, instance, network_info): + # NOTE(PhilDay): Accessing network_info will block if the thread + # it wraps hasn't finished, so do this ahead of time so that we + # don't block while holding the logging lock. + network_info_str = str(network_info) + LOG.debug("plug: instance_uuid=%(uuid)s vif=%(network_info)s", + {'uuid': instance.uuid, + 'network_info': network_info_str}) + for vif in network_info: + port_id = six.text_type(vif['id']) + try: + self.ironicclient.call("node.vif_attach", node.uuid, port_id, + retry_on_conflict=False) + except ironic.exc.BadRequest as e: + msg = (_("Cannot attach VIF %(vif)s to the node %(node)s due " + "to error: %(err)s") % {'vif': port_id, + 'node': node.uuid, 'err': e}) + LOG.error(msg) + raise exception.VirtualInterfacePlugException(msg) + except ironic.exc.Conflict: + # NOTE (vsaienko) Pass since VIF already attached. + pass + + def _unplug_vifs(self, node, instance, network_info): + # NOTE(PhilDay): Accessing network_info will block if the thread + # it wraps hasn't finished, so do this ahead of time so that we + # don't block while holding the logging lock. + network_info_str = str(network_info) + LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(network_info)s", + {'uuid': instance.uuid, + 'network_info': network_info_str}) + if not network_info: + return + for vif in network_info: + port_id = six.text_type(vif['id']) + try: + self.ironicclient.call("node.vif_detach", node.uuid, + port_id) + except ironic.exc.BadRequest: + LOG.debug("VIF %(vif)s isn't attached to Ironic node %(node)s", + {'vif': port_id, 'node': node.uuid}) + + def plug_vifs(self, instance, network_info): + """Plug VIFs into networks. + + :param instance: The instance object. + :param network_info: Instance network information. + + """ + # instance.node is the ironic node's UUID. + node = self._get_node(instance.node) + self._plug_vifs(node, instance, network_info) + + def unplug_vifs(self, instance, network_info): + """Unplug VIFs from networks. + + :param instance: The instance object. + :param network_info: Instance network information. + + """ + # instance.node is the ironic node's UUID. + node = self._get_node(instance.node) + self._unplug_vifs(node, instance, network_info) + + def attach_interface(self, context, instance, image_meta, vif): + """Use hotplug to add a network interface to a running instance. + The counter action to this is :func:`detach_interface`. + + :param context: The request context. + :param nova.objects.instance.Instance instance: + The instance which will get an additional network interface. + :param nova.objects.ImageMeta image_meta: + The metadata of the image of the instance. + :param nova.network.model.VIF vif: + The object which has the information about the interface to attach. + :raise nova.exception.NovaException: If the attach fails. + :returns: None + """ + # NOTE(vdrok): instance info cache gets updated by the network-changed + # event from neutron or by _heal_instance_info_cache periodic task. In + # both cases, this is done asynchronously, so the cache may not be up + # to date immediately after attachment. + self.plug_vifs(instance, [vif]) + + def detach_interface(self, context, instance, vif): + """Use hotunplug to remove a network interface from a running instance. + The counter action to this is :func:`attach_interface`. + + :param context: The request context. + :param nova.objects.instance.Instance instance: + The instance which gets a network interface removed. + :param nova.network.model.VIF vif: + The object which has the information about the interface to detach. + :raise nova.exception.NovaException: If the detach fails. + :returns: None + """ + # NOTE(vdrok): instance info cache gets updated by the network-changed + # event from neutron or by _heal_instance_info_cache periodic task. In + # both cases, this is done asynchronously, so the cache may not be up + # to date immediately after detachment. + self.unplug_vifs(instance, [vif]) + + def rebuild(self, context, instance, image_meta, injected_files, + admin_password, bdms, detach_block_devices, + attach_block_devices, network_info=None, + recreate=False, block_device_info=None, + preserve_ephemeral=False): + """Rebuild/redeploy an instance. + + This version of rebuild() allows for supporting the option to + preserve the ephemeral partition. We cannot call spawn() from + here because it will attempt to set the instance_uuid value + again, which is not allowed by the Ironic API. It also requires + the instance to not have an 'active' provision state, but we + cannot safely change that. Given that, we implement only the + portions of spawn() we need within rebuild(). + + :param context: The security context. + :param instance: The instance object. + :param image_meta: Image object returned by nova.image.glance + that defines the image from which to boot this instance. Ignored + by this driver. + :param injected_files: User files to inject into instance. Ignored + by this driver. + :param admin_password: Administrator password to set in + instance. Ignored by this driver. + :param bdms: block-device-mappings to use for rebuild. Ignored + by this driver. + :param detach_block_devices: function to detach block devices. See + nova.compute.manager.ComputeManager:_rebuild_default_impl for + usage. Ignored by this driver. + :param attach_block_devices: function to attach block devices. See + nova.compute.manager.ComputeManager:_rebuild_default_impl for + usage. Ignored by this driver. + :param network_info: Instance network information. Ignored by + this driver. + :param recreate: Boolean value; if True the instance is + recreated on a new hypervisor - all the cleanup of old state is + skipped. Ignored by this driver. + :param block_device_info: Instance block device + information. Ignored by this driver. + :param preserve_ephemeral: Boolean value; if True the ephemeral + must be preserved on rebuild. + + """ + LOG.debug('Rebuild called for instance', instance=instance) + + instance.task_state = task_states.REBUILD_SPAWNING + instance.save(expected_task_state=[task_states.REBUILDING]) + + node_uuid = instance.node + node = self._get_node(node_uuid) + + self._add_instance_info_to_node(node, instance, image_meta, + instance.flavor, preserve_ephemeral) + + # Trigger the node rebuild/redeploy. + try: + self.ironicclient.call("node.set_provision_state", + node_uuid, ironic_states.REBUILD) + except (exception.NovaException, # Retry failed + ironic.exc.InternalServerError, # Validations + ironic.exc.BadRequest) as e: # Maintenance + msg = (_("Failed to request Ironic to rebuild instance " + "%(inst)s: %(reason)s") % {'inst': instance.uuid, + 'reason': six.text_type(e)}) + raise exception.InstanceDeployFailure(msg) + + # Although the target provision state is REBUILD, it will actually go + # to ACTIVE once the redeploy is finished. + timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active, + instance) + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info('Instance was successfully rebuilt', instance=instance) + + def network_binding_host_id(self, context, instance): + """Get host ID to associate with network ports. + + This defines the binding:host_id parameter to the port-create calls for + Neutron. If using the neutron network interface (separate networks for + the control plane and tenants), return None here to indicate that the + port should not yet be bound; Ironic will make a port-update call to + Neutron later to tell Neutron to bind the port. + + NOTE: the late binding is important for security. If an ML2 mechanism + manages to connect the tenant network to the baremetal machine before + deployment is done (e.g. port-create time), then the tenant potentially + has access to the deploy agent, which may contain firmware blobs or + secrets. ML2 mechanisms may be able to connect the port without the + switchport info that comes from ironic, if they store that switchport + info for some reason. As such, we should *never* pass binding:host_id + in the port-create call when using the 'neutron' network_interface, + because a null binding:host_id indicates to Neutron that it should + not connect the port yet. + + :param context: request context + :param instance: nova.objects.instance.Instance that the network + ports will be associated with + :returns: None + """ + # NOTE(vsaienko) Ironic will set binding:host_id later with port-update + # call when updating mac address or setting binding:profile + # to tell Neutron to bind the port. + return None + + def _get_node_console_with_reset(self, instance): + """Acquire console information for an instance. + + If the console is enabled, the console will be re-enabled + before returning. + + :param instance: nova instance + :return: a dictionary with below values + { 'node': ironic node + 'console_info': node console info } + :raise ConsoleNotAvailable: if console is unavailable + for the instance + """ + node = self._validate_instance_and_node(instance) + node_uuid = node.uuid + + def _get_console(): + """Request ironicclient to acquire node console.""" + try: + return self.ironicclient.call('node.get_console', node_uuid) + except (exception.NovaException, # Retry failed + ironic.exc.InternalServerError, # Validations + ironic.exc.BadRequest) as e: # Maintenance + LOG.error('Failed to acquire console information for ' + 'instance %(inst)s: %(reason)s', + {'inst': instance.uuid, 'reason': e}) + raise exception.ConsoleNotAvailable() + + def _wait_state(state): + """Wait for the expected console mode to be set on node.""" + console = _get_console() + if console['console_enabled'] == state: + raise loopingcall.LoopingCallDone(retvalue=console) + + _log_ironic_polling('set console mode', node, instance) + + # Return False to start backing off + return False + + def _enable_console(mode): + """Request ironicclient to enable/disable node console.""" + try: + self.ironicclient.call('node.set_console_mode', node_uuid, + mode) + except (exception.NovaException, # Retry failed + ironic.exc.InternalServerError, # Validations + ironic.exc.BadRequest) as e: # Maintenance + LOG.error('Failed to set console mode to "%(mode)s" ' + 'for instance %(inst)s: %(reason)s', + {'mode': mode, + 'inst': instance.uuid, + 'reason': e}) + raise exception.ConsoleNotAvailable() + + # Waiting for the console state to change (disabled/enabled) + try: + timer = loopingcall.BackOffLoopingCall(_wait_state, state=mode) + return timer.start( + starting_interval=_CONSOLE_STATE_CHECKING_INTERVAL, + timeout=CONF.ironic.serial_console_state_timeout, + jitter=0.5).wait() + except loopingcall.LoopingCallTimeOut: + LOG.error('Timeout while waiting for console mode to be ' + 'set to "%(mode)s" on node %(node)s', + {'mode': mode, + 'node': node_uuid}) + raise exception.ConsoleNotAvailable() + + # Acquire the console + console = _get_console() + + # NOTE: Resetting console is a workaround to force acquiring + # console when it has already been acquired by another user/operator. + # IPMI serial console does not support multi session, so + # resetting console will deactivate any active one without + # warning the operator. + if console['console_enabled']: + try: + # Disable console + _enable_console(False) + # Then re-enable it + console = _enable_console(True) + except exception.ConsoleNotAvailable: + # NOTE: We try to do recover on failure. + # But if recover fails, the console may remain in + # "disabled" state and cause any new connection + # will be refused. + console = _enable_console(True) + + if console['console_enabled']: + return {'node': node, + 'console_info': console['console_info']} + else: + LOG.debug('Console is disabled for instance %s', + instance.uuid) + raise exception.ConsoleNotAvailable() + + def get_serial_console(self, context, instance): + """Acquire serial console information. + + :param context: request context + :param instance: nova instance + :return: ConsoleSerial object + :raise ConsoleTypeUnavailable: if serial console is unavailable + for the instance + """ + LOG.debug('Getting serial console', instance=instance) + try: + result = self._get_node_console_with_reset(instance) + except exception.ConsoleNotAvailable: + raise exception.ConsoleTypeUnavailable(console_type='serial') + + node = result['node'] + console_info = result['console_info'] + + if console_info["type"] != "socat": + LOG.warning('Console type "%(type)s" (of ironic node ' + '%(node)s) does not support Nova serial console', + {'type': console_info["type"], + 'node': node.uuid}, + instance=instance) + raise exception.ConsoleTypeUnavailable(console_type='serial') + + # Parse and check the console url + url = urlparse.urlparse(console_info["url"]) + try: + scheme = url.scheme + hostname = url.hostname + port = url.port + if not (scheme and hostname and port): + raise AssertionError() + except (ValueError, AssertionError): + LOG.error('Invalid Socat console URL "%(url)s" ' + '(ironic node %(node)s)', + {'url': console_info["url"], + 'node': node.uuid}, + instance=instance) + raise exception.ConsoleTypeUnavailable(console_type='serial') + + if scheme == "tcp": + return console_type.ConsoleSerial(host=hostname, + port=port) + else: + LOG.warning('Socat serial console only supports "tcp". ' + 'This URL is "%(url)s" (ironic node %(node)s).', + {'url': console_info["url"], + 'node': node.uuid}, + instance=instance) + raise exception.ConsoleTypeUnavailable(console_type='serial') + + @property + def need_legacy_block_device_info(self): + return False + + def get_volume_connector(self, instance): + """Get connector information for the instance for attaching to volumes. + + Connector information is a dictionary representing the hardware + information that will be making the connection. This information + consists of properties for protocols supported by the hardware. + If the hardware supports iSCSI protocol, iSCSI initiator IQN is + included as follows:: + + { + 'ip': ip, + 'initiator': initiator, + 'host': hostname + } + + :param instance: nova instance + :returns: A connector information dictionary + """ + node = self.ironicclient.call("node.get", instance.node) + properties = self._parse_node_properties(node) + connectors = self.ironicclient.call("node.list_volume_connectors", + instance.node, detail=True) + values = {} + for conn in connectors: + values.setdefault(conn.type, []).append(conn.connector_id) + props = {} + + if values.get('ip'): + props['ip'] = props['host'] = values['ip'][0] + if values.get('iqn'): + props['initiator'] = values['iqn'][0] + if values.get('wwpn'): + props['wwpns'] = values['wwpn'] + if values.get('wwnn'): + props['wwnns'] = values['wwnn'] + props['platform'] = properties.get('cpu_arch') + props['os_type'] = 'baremetal' + + # Eventually it would be nice to be able to do multipath, but for now + # we should at least set the value to False. + props['multipath'] = False + return props