220 lines
9.5 KiB
Python
220 lines
9.5 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from oslo_log import log as logging
|
|
import oslo_messaging as messaging
|
|
import six
|
|
|
|
from nova.compute import power_state
|
|
from nova.conductor.tasks import base
|
|
import nova.conf
|
|
from nova import exception
|
|
from nova.i18n import _
|
|
from nova import objects
|
|
from nova.scheduler import utils as scheduler_utils
|
|
from nova import utils
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
CONF = nova.conf.CONF
|
|
|
|
|
|
class LiveMigrationTask(base.TaskBase):
|
|
def __init__(self, context, instance, destination,
|
|
block_migration, disk_over_commit, migration, compute_rpcapi,
|
|
servicegroup_api, scheduler_client, request_spec=None):
|
|
super(LiveMigrationTask, self).__init__(context, instance)
|
|
self.destination = destination
|
|
self.block_migration = block_migration
|
|
self.disk_over_commit = disk_over_commit
|
|
self.migration = migration
|
|
self.source = instance.host
|
|
self.migrate_data = None
|
|
|
|
self.compute_rpcapi = compute_rpcapi
|
|
self.servicegroup_api = servicegroup_api
|
|
self.scheduler_client = scheduler_client
|
|
self.request_spec = request_spec
|
|
|
|
def _execute(self):
|
|
self._check_instance_is_active()
|
|
self._check_host_is_up(self.source)
|
|
|
|
if not self.destination:
|
|
self.destination = self._find_destination()
|
|
self.migration.dest_compute = self.destination
|
|
self.migration.save()
|
|
else:
|
|
self._check_requested_destination()
|
|
|
|
# TODO(johngarbutt) need to move complexity out of compute manager
|
|
# TODO(johngarbutt) disk_over_commit?
|
|
return self.compute_rpcapi.live_migration(self.context,
|
|
host=self.source,
|
|
instance=self.instance,
|
|
dest=self.destination,
|
|
block_migration=self.block_migration,
|
|
migration=self.migration,
|
|
migrate_data=self.migrate_data)
|
|
|
|
def rollback(self):
|
|
# TODO(johngarbutt) need to implement the clean up operation
|
|
# but this will make sense only once we pull in the compute
|
|
# calls, since this class currently makes no state changes,
|
|
# except to call the compute method, that has no matching
|
|
# rollback call right now.
|
|
pass
|
|
|
|
def _check_instance_is_active(self):
|
|
if self.instance.power_state not in (power_state.RUNNING,
|
|
power_state.PAUSED):
|
|
raise exception.InstanceInvalidState(
|
|
instance_uuid=self.instance.uuid,
|
|
attr='power_state',
|
|
state=self.instance.power_state,
|
|
method='live migrate')
|
|
|
|
def _check_host_is_up(self, host):
|
|
service = objects.Service.get_by_compute_host(self.context, host)
|
|
|
|
if not self.servicegroup_api.service_is_up(service):
|
|
raise exception.ComputeServiceUnavailable(host=host)
|
|
|
|
def _check_requested_destination(self):
|
|
self._check_destination_is_not_source()
|
|
self._check_host_is_up(self.destination)
|
|
self._check_destination_has_enough_memory()
|
|
self._check_compatible_with_source_hypervisor(self.destination)
|
|
self._call_livem_checks_on_host(self.destination)
|
|
|
|
def _check_destination_is_not_source(self):
|
|
if self.destination == self.source:
|
|
raise exception.UnableToMigrateToSelf(
|
|
instance_id=self.instance.uuid, host=self.destination)
|
|
|
|
def _check_destination_has_enough_memory(self):
|
|
compute = self._get_compute_info(self.destination)
|
|
free_ram_mb = compute.free_ram_mb
|
|
total_ram_mb = compute.memory_mb
|
|
mem_inst = self.instance.memory_mb
|
|
# NOTE(sbauza): Now the ComputeNode object reports an allocation ratio
|
|
# that can be provided by the compute_node if new or by the controller
|
|
ram_ratio = compute.ram_allocation_ratio
|
|
|
|
# NOTE(sbauza): Mimic the RAMFilter logic in order to have the same
|
|
# ram validation
|
|
avail = total_ram_mb * ram_ratio - (total_ram_mb - free_ram_mb)
|
|
if not mem_inst or avail <= mem_inst:
|
|
instance_uuid = self.instance.uuid
|
|
dest = self.destination
|
|
reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: "
|
|
"Lack of memory(host:%(avail)s <= "
|
|
"instance:%(mem_inst)s)")
|
|
raise exception.MigrationPreCheckError(reason=reason % dict(
|
|
instance_uuid=instance_uuid, dest=dest, avail=avail,
|
|
mem_inst=mem_inst))
|
|
|
|
def _get_compute_info(self, host):
|
|
return objects.ComputeNode.get_first_node_by_host_for_old_compat(
|
|
self.context, host)
|
|
|
|
def _check_compatible_with_source_hypervisor(self, destination):
|
|
source_info = self._get_compute_info(self.source)
|
|
destination_info = self._get_compute_info(destination)
|
|
|
|
source_type = source_info.hypervisor_type
|
|
destination_type = destination_info.hypervisor_type
|
|
if source_type != destination_type:
|
|
raise exception.InvalidHypervisorType()
|
|
|
|
source_version = source_info.hypervisor_version
|
|
destination_version = destination_info.hypervisor_version
|
|
if source_version > destination_version:
|
|
raise exception.DestinationHypervisorTooOld()
|
|
|
|
def _call_livem_checks_on_host(self, destination):
|
|
try:
|
|
self.migrate_data = self.compute_rpcapi.\
|
|
check_can_live_migrate_destination(self.context, self.instance,
|
|
destination, self.block_migration, self.disk_over_commit)
|
|
except messaging.MessagingTimeout:
|
|
msg = _("Timeout while checking if we can live migrate to host: "
|
|
"%s") % destination
|
|
raise exception.MigrationPreCheckError(msg)
|
|
|
|
def _find_destination(self):
|
|
# TODO(johngarbutt) this retry loop should be shared
|
|
attempted_hosts = [self.source]
|
|
image = utils.get_image_from_system_metadata(
|
|
self.instance.system_metadata)
|
|
filter_properties = {'ignore_hosts': attempted_hosts}
|
|
# TODO(sbauza): Remove that once setup_instance_group() accepts a
|
|
# RequestSpec object
|
|
request_spec = {'instance_properties': {'uuid': self.instance.uuid}}
|
|
scheduler_utils.setup_instance_group(self.context, request_spec,
|
|
filter_properties)
|
|
if not self.request_spec:
|
|
# NOTE(sbauza): We were unable to find an original RequestSpec
|
|
# object - probably because the instance is old.
|
|
# We need to mock that the old way
|
|
request_spec = objects.RequestSpec.from_components(
|
|
self.context, self.instance.uuid, image,
|
|
self.instance.flavor, self.instance.numa_topology,
|
|
self.instance.pci_requests,
|
|
filter_properties, None, self.instance.availability_zone
|
|
)
|
|
else:
|
|
request_spec = self.request_spec
|
|
# NOTE(sbauza): Force_hosts/nodes needs to be reset
|
|
# if we want to make sure that the next destination
|
|
# is not forced to be the original host
|
|
request_spec.reset_forced_destinations()
|
|
|
|
host = None
|
|
while host is None:
|
|
self._check_not_over_max_retries(attempted_hosts)
|
|
request_spec.ignore_hosts = attempted_hosts
|
|
try:
|
|
host = self.scheduler_client.select_destinations(self.context,
|
|
request_spec)[0]['host']
|
|
except messaging.RemoteError as ex:
|
|
# TODO(ShaoHe Feng) There maybe multi-scheduler, and the
|
|
# scheduling algorithm is R-R, we can let other scheduler try.
|
|
# Note(ShaoHe Feng) There are types of RemoteError, such as
|
|
# NoSuchMethod, UnsupportedVersion, we can distinguish it by
|
|
# ex.exc_type.
|
|
raise exception.MigrationSchedulerRPCError(
|
|
reason=six.text_type(ex))
|
|
try:
|
|
self._check_compatible_with_source_hypervisor(host)
|
|
self._call_livem_checks_on_host(host)
|
|
except (exception.Invalid, exception.MigrationPreCheckError) as e:
|
|
LOG.debug("Skipping host: %(host)s because: %(e)s",
|
|
{"host": host, "e": e})
|
|
attempted_hosts.append(host)
|
|
host = None
|
|
return host
|
|
|
|
def _check_not_over_max_retries(self, attempted_hosts):
|
|
if CONF.migrate_max_retries == -1:
|
|
return
|
|
|
|
retries = len(attempted_hosts) - 1
|
|
if retries > CONF.migrate_max_retries:
|
|
if self.migration:
|
|
self.migration.status = 'failed'
|
|
self.migration.save()
|
|
msg = (_('Exceeded max scheduling retries %(max_retries)d for '
|
|
'instance %(instance_uuid)s during live migration')
|
|
% {'max_retries': retries,
|
|
'instance_uuid': self.instance.uuid})
|
|
raise exception.MaxRetriesExceeded(reason=msg)
|