233 lines
9.4 KiB
Python
233 lines
9.4 KiB
Python
# Copyright 2016 Cloudbase Solutions Srl
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""Management class for Cluster VM operations."""
|
|
|
|
import functools
|
|
|
|
from nova.compute import power_state
|
|
from nova.compute import task_states
|
|
from nova.compute import vm_states
|
|
from nova import context
|
|
from nova import network
|
|
from nova import objects
|
|
from nova import utils
|
|
from nova.virt import block_device
|
|
from os_win import exceptions as os_win_exc
|
|
from os_win import utilsfactory
|
|
from oslo_log import log as logging
|
|
|
|
import compute_hyperv.nova.conf
|
|
from compute_hyperv.nova import hostops
|
|
from compute_hyperv.nova import serialconsoleops
|
|
from compute_hyperv.nova import vmops
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
CONF = compute_hyperv.nova.conf.CONF
|
|
|
|
|
|
class ClusterOps(object):
|
|
|
|
def __init__(self):
|
|
self._clustutils = utilsfactory.get_clusterutils()
|
|
self._vmutils = utilsfactory.get_vmutils()
|
|
self._clustutils.check_cluster_state()
|
|
self._instance_map = {}
|
|
|
|
self._this_node = hostops.HostOps.get_hostname()
|
|
|
|
self._context = context.get_admin_context()
|
|
self._network_api = network.API()
|
|
self._vmops = vmops.VMOps()
|
|
self._serial_console_ops = serialconsoleops.SerialConsoleOps()
|
|
|
|
def get_instance_host(self, instance):
|
|
return self._clustutils.get_vm_host(instance.name)
|
|
|
|
def add_to_cluster(self, instance):
|
|
try:
|
|
self._clustutils.add_vm_to_cluster(
|
|
instance.name, CONF.hyperv.max_failover_count,
|
|
CONF.hyperv.failover_period, CONF.hyperv.auto_failback)
|
|
self._instance_map[instance.name] = instance.uuid
|
|
except os_win_exc.HyperVClusterException:
|
|
LOG.exception('Adding instance to cluster failed.',
|
|
instance=instance)
|
|
|
|
def remove_from_cluster(self, instance):
|
|
try:
|
|
if self._clustutils.vm_exists(instance.name):
|
|
self._clustutils.delete(instance.name)
|
|
self._instance_map.pop(instance.name, None)
|
|
except os_win_exc.HyperVClusterException:
|
|
LOG.exception('Removing instance from cluster failed.',
|
|
instance=instance)
|
|
|
|
def post_migration(self, instance):
|
|
# update instance cache
|
|
self._instance_map[instance.name] = instance.uuid
|
|
|
|
def start_failover_listener_daemon(self):
|
|
"""Start the daemon failover listener."""
|
|
|
|
listener = self._clustutils.get_vm_owner_change_listener_v2()
|
|
cbk = functools.partial(utils.spawn_n, self._failover_migrate)
|
|
|
|
utils.spawn_n(listener, cbk)
|
|
|
|
def reclaim_failovered_instances(self):
|
|
# NOTE(claudiub): some instances might have failovered while the
|
|
# nova-compute service was down. Those instances will have to be
|
|
# reclaimed by this node.
|
|
expected_attrs = ['id', 'uuid', 'name', 'host']
|
|
host_instance_uuids = self._vmops.list_instance_uuids()
|
|
nova_instances = self._get_nova_instances(expected_attrs,
|
|
host_instance_uuids)
|
|
|
|
# filter out instances that are known to be on this host.
|
|
nova_instances = [instance for instance in nova_instances if
|
|
self._this_node.upper() != instance.host.upper()]
|
|
|
|
for instance in nova_instances:
|
|
utils.spawn_n(self._failover_migrate,
|
|
instance.name, instance.host,
|
|
self._this_node)
|
|
|
|
def _failover_migrate(self, instance_name, new_host):
|
|
"""This method will check if the generated event is a legitimate
|
|
failover to this node. If it is, it will proceed to prepare the
|
|
failovered VM if necessary and update the owner of the compute vm in
|
|
nova and ports in neutron.
|
|
"""
|
|
instance = self._get_instance_by_name(instance_name)
|
|
if not instance:
|
|
# Some instances on the hypervisor may not be tracked by nova
|
|
LOG.debug('Instance %s does not exist in nova. Skipping.',
|
|
instance_name)
|
|
return
|
|
|
|
old_host = instance.host
|
|
LOG.info('Checking instance failover %(instance)s to %(new_host)s '
|
|
'from host %(old_host)s.',
|
|
{'instance': instance_name,
|
|
'new_host': new_host,
|
|
'old_host': old_host})
|
|
|
|
if instance.task_state == task_states.MIGRATING:
|
|
# In case of live migration triggered by the user, we get the
|
|
# event that the instance changed host but we do not want
|
|
# to treat it as a failover.
|
|
LOG.debug('Instance %s is live migrating.', instance_name)
|
|
return
|
|
|
|
nw_info = self._network_api.get_instance_nw_info(self._context,
|
|
instance)
|
|
if old_host and old_host.upper() == self._this_node.upper():
|
|
LOG.debug('Actions at source node.')
|
|
self._vmops.unplug_vifs(instance, nw_info)
|
|
return
|
|
elif new_host.upper() != self._this_node.upper():
|
|
LOG.debug('Instance %s did not failover to this node.',
|
|
instance_name)
|
|
return
|
|
|
|
LOG.info('Handling instance %(instance)s failover to this host.',
|
|
{'instance': instance_name})
|
|
|
|
self._nova_failover_server(instance, new_host)
|
|
self._failover_migrate_networks(instance, old_host)
|
|
self._vmops.plug_vifs(instance, nw_info)
|
|
self._serial_console_ops.start_console_handler(instance_name)
|
|
|
|
def _failover_migrate_networks(self, instance, source):
|
|
"""This is called after a VM failovered to this node.
|
|
This will change the owner of the neutron ports to this node.
|
|
"""
|
|
migration = {'source_compute': source,
|
|
'dest_compute': self._this_node, }
|
|
|
|
self._network_api.setup_networks_on_host(
|
|
self._context, instance, self._this_node)
|
|
self._network_api.migrate_instance_start(
|
|
self._context, instance, migration)
|
|
self._network_api.setup_networks_on_host(
|
|
self._context, instance, self._this_node)
|
|
self._network_api.migrate_instance_finish(
|
|
self._context, instance, migration)
|
|
self._network_api.setup_networks_on_host(
|
|
self._context, instance, self._this_node)
|
|
self._network_api.setup_networks_on_host(
|
|
self._context, instance, source, teardown=True)
|
|
|
|
def _get_instance_by_name(self, instance_name):
|
|
# Since from a failover we only get the instance name
|
|
# we need to find it's uuid so we can retrieve the instance
|
|
# object from nova. We keep a map from the instance name to the
|
|
# instance uuid. First we try to get the vm uuid from that map
|
|
# if it's not there, we try to get it from the instance notes,
|
|
# this may fail (during a failover for example, the vm will not
|
|
# be at the source node anymore) and lastly we try and get the
|
|
# vm uuid from the database.
|
|
vm_uuid = self._instance_map.get(instance_name)
|
|
if not vm_uuid:
|
|
try:
|
|
vm_uuid = self._vmutils.get_instance_uuid(instance_name)
|
|
self._instance_map[instance_name] = vm_uuid
|
|
except os_win_exc.HyperVVMNotFoundException:
|
|
pass
|
|
|
|
if not vm_uuid:
|
|
self._update_instance_map()
|
|
vm_uuid = self._instance_map.get(instance_name)
|
|
|
|
if not vm_uuid:
|
|
LOG.debug("Instance %s cannot be found in Nova.", instance_name)
|
|
return
|
|
|
|
return objects.Instance.get_by_uuid(self._context, vm_uuid)
|
|
|
|
def _update_instance_map(self):
|
|
for server in self._get_nova_instances():
|
|
self._instance_map[server.name] = server.uuid
|
|
|
|
def _get_nova_instances(self, expected_attrs=None, instance_uuids=None):
|
|
if not expected_attrs:
|
|
expected_attrs = ['id', 'uuid', 'name']
|
|
|
|
filters = {'deleted': False}
|
|
if instance_uuids is not None:
|
|
filters['uuid'] = instance_uuids
|
|
|
|
return objects.InstanceList.get_by_filters(
|
|
self._context, filters, expected_attrs=expected_attrs)
|
|
|
|
def _get_instance_block_device_mappings(self, instance):
|
|
"""Transform block devices to the driver block_device format."""
|
|
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
|
|
self._context, instance.uuid)
|
|
return [block_device.DriverVolumeBlockDevice(bdm) for bdm in bdms]
|
|
|
|
def _nova_failover_server(self, instance, new_host):
|
|
if instance.vm_state == vm_states.ERROR:
|
|
# Sometimes during a failover nova can set the instance state
|
|
# to error depending on how much time the failover takes.
|
|
instance.vm_state = vm_states.ACTIVE
|
|
if instance.power_state == power_state.NOSTATE:
|
|
instance.power_state = power_state.RUNNING
|
|
|
|
instance.host = new_host
|
|
instance.node = new_host
|
|
instance.save(expected_task_state=[None])
|