Use COMPUTE_SAME_HOST_COLD_MIGRATE trait during migrate

This uses the COMPUTE_SAME_HOST_COLD_MIGRATE trait in the API during a
cold migration to filter out hosts that cannot support same-host cold
migration, which is all of them except for the hosts using the vCenter
driver.

For any nodes that do not report the trait, we won't know if they don't
because they don't support it or if they are not new enough to report
it, so the API has a service version check and will fallback to old
behavior using the config if the node is old. That compat code can be
removed in the next release.

As a result of this the FakeDriver capabilities are updated so the
FakeDriver no longer supports same-host cold migration and a new fake
driver is added to support that scenario for any tests that need it.

Change-Id: I7a4b951f3ab324c666ab924e6003d24cc8e539f5
Closes-Bug: #1748697
Related-Bug: #1811235
This commit is contained in:
Matt Riedemann 2019-11-20 10:27:18 -05:00 committed by Stephen Finucane
parent 9fa3600fca
commit 4921e822e7
10 changed files with 216 additions and 8 deletions

View File

@ -66,7 +66,7 @@ os-brick==2.6.2
os-client-config==1.29.0
os-resource-classes==0.4.0
os-service-types==1.7.0
os-traits==2.0.0
os-traits==2.1.0
os-vif==1.14.0
os-win==3.0.0
os-xenapi==0.3.3

View File

@ -25,6 +25,7 @@ import re
import string
from castellan import key_manager
import os_traits
from oslo_log import log as logging
from oslo_messaging import exceptions as oslo_exceptions
from oslo_serialization import base64 as base64utils
@ -103,6 +104,7 @@ AGGREGATE_ACTION_ADD = 'Add'
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
MIN_COMPUTE_CROSS_CELL_RESIZE = 47
MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
@ -3881,8 +3883,7 @@ class API(base.Base):
validate_pci=True)
filter_properties = {'ignore_hosts': []}
if not CONF.allow_resize_to_same_host:
if not self._allow_resize_to_same_host(same_instance_type, instance):
filter_properties['ignore_hosts'].append(instance.host)
request_spec = objects.RequestSpec.get_by_instance_uuid(
@ -3931,6 +3932,57 @@ class API(base.Base):
request_spec=request_spec,
do_cast=True)
def _allow_resize_to_same_host(self, cold_migrate, instance):
"""Contains logic for excluding the instance.host on resize/migrate.
If performing a cold migration and the compute node resource provider
reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
migration is allowed otherwise it is not and the current instance.host
should be excluded as a scheduling candidate.
:param cold_migrate: true if performing a cold migration, false
for resize
:param instance: Instance object being resized or cold migrated
:returns: True if same-host resize/cold migrate is allowed, False
otherwise
"""
if cold_migrate:
# Check to see if the compute node resource provider on which the
# instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
# trait.
# Note that we check this here in the API since we cannot
# pre-filter allocation candidates in the scheduler using this
# trait as it would not work. For example, libvirt nodes will not
# report the trait but using it as a forbidden trait filter when
# getting allocation candidates would still return libvirt nodes
# which means we could attempt to cold migrate to the same libvirt
# node, which would fail.
ctxt = instance._context
cn = objects.ComputeNode.get_by_host_and_nodename(
ctxt, instance.host, instance.node)
traits = self.placementclient.get_provider_traits(
ctxt, cn.uuid).traits
# If the provider has the trait it is (1) new enough to report that
# trait and (2) supports cold migration on the same host.
if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
allow_same_host = True
else:
# TODO(mriedem): Remove this compatibility code after one
# release. If the compute is old we will not know if it
# supports same-host cold migration so we fallback to config.
service = objects.Service.get_by_compute_host(ctxt, cn.host)
if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
# The compute is new enough to report the trait but does
# not so same-host cold migration is not allowed.
allow_same_host = False
else:
# The compute is not new enough to report the trait so we
# fallback to config.
allow_same_host = CONF.allow_resize_to_same_host
else:
allow_same_host = CONF.allow_resize_to_same_host
return allow_same_host
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 47
SERVICE_VERSION = 48
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -176,6 +176,8 @@ SERVICE_VERSION_HISTORY = (
# Version 47: Compute RPC v5.10:
# finish_revert_snapshot_based_resize_at_source
{'compute_rpc': '5.10'},
# Version 48: Drivers report COMPUTE_SAME_HOST_COLD_MIGRATE trait.
{'compute_rpc': '5.10'},
)

View File

@ -0,0 +1,122 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
from nova.compute import api as compute_api
from nova import context as nova_context
from nova import objects
from nova import test
from nova.tests.functional import integrated_helpers
from nova.tests.unit import fake_notifier
class ColdMigrationDisallowSameHost(
integrated_helpers.ProviderUsageBaseTestCase):
"""Tests cold migrate where the source host does not have the
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
"""
compute_driver = 'fake.MediumFakeDriver'
def setUp(self):
super(ColdMigrationDisallowSameHost, self).setUp()
# Start one compute service which will use the fake virt driver
# which disallows cold migration to the same host.
self._start_compute('host1')
def _wait_for_migrate_no_valid_host(self, error='NoValidHost'):
event = fake_notifier.wait_for_versioned_notifications(
'compute_task.migrate_server.error')[0]
self.assertEqual(error,
event['payload']['nova_object.data']['reason'][
'nova_object.data']['exception'])
def test_cold_migrate_same_host_not_supported(self):
"""Simple test to show that you cannot cold-migrate to the same host
when the resource provider does not expose the
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
"""
server = self._create_server(networks='none')
# The fake driver does not report COMPUTE_SAME_HOST_COLD_MIGRATE
# so cold migration should fail since we only have one host.
self.api.post_server_action(server['id'], {'migrate': None})
self._wait_for_migrate_no_valid_host()
def test_cold_migrate_same_host_old_compute_disallow(self):
"""Upgrade compat test where the resource provider does not report
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
old so the API falls back to the allow_resize_to_same_host config which
defaults to False.
"""
server = self._create_server(networks='none')
# Stub the compute service version check to make the compute service
# appear old.
fake_service = objects.Service()
fake_service.version = (
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
with mock.patch('nova.objects.Service.get_by_compute_host',
return_value=fake_service) as mock_get_service:
self.api.post_server_action(server['id'], {'migrate': None})
mock_get_service.assert_called_once_with(
test.MatchType(nova_context.RequestContext), 'host1')
# Since allow_resize_to_same_host defaults to False scheduling failed
# since there are no other hosts.
self._wait_for_migrate_no_valid_host()
def test_cold_migrate_same_host_old_compute_allow(self):
"""Upgrade compat test where the resource provider does not report
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
old so the API falls back to the allow_resize_to_same_host config which
in this test is set to True.
"""
self.flags(allow_resize_to_same_host=True)
server = self._create_server(networks='none')
# Stub the compute service version check to make the compute service
# appear old.
fake_service = objects.Service()
fake_service.version = (
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
with mock.patch('nova.objects.Service.get_by_compute_host',
return_value=fake_service) as mock_get_service:
self.api.post_server_action(server['id'], {'migrate': None})
mock_get_service.assert_called_once_with(
test.MatchType(nova_context.RequestContext), 'host1')
# In this case the compute is old so the API falls back to checking
# allow_resize_to_same_host which says same-host cold migrate is
# allowed so the scheduler sends the request to the only compute
# available but the virt driver says same-host cold migrate is not
# supported and raises UnableToMigrateToSelf. A reschedule is sent
# to conductor which results in MaxRetriesExceeded since there are no
# alternative hosts.
self._wait_for_migrate_no_valid_host(error='MaxRetriesExceeded')
class ColdMigrationAllowSameHost(
integrated_helpers.ProviderUsageBaseTestCase):
"""Tests cold migrate where the source host has the
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
"""
compute_driver = 'fake.SameHostColdMigrateDriver'
def setUp(self):
super(ColdMigrationAllowSameHost, self).setUp()
self._start_compute('host1')
def test_cold_migrate_same_host_supported(self):
"""Simple test to show that you can cold-migrate to the same host
when the resource provider exposes the COMPUTE_SAME_HOST_COLD_MIGRATE
trait.
"""
server = self._create_server(networks='none')
self.api.post_server_action(server['id'], {'migrate': None})
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])

View File

@ -1741,6 +1741,7 @@ class _ComputeAPIUnitTestMixIn(object):
@mock.patch('nova.compute.api.API.get_instance_host_status',
new=mock.Mock(return_value=fields_obj.HostStatus.UP))
@mock.patch('nova.compute.api.API._allow_resize_to_same_host')
@mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False)
@mock.patch('nova.compute.api.API._validate_flavor_image_nostatus')
@ -1757,6 +1758,7 @@ class _ComputeAPIUnitTestMixIn(object):
mock_get_by_instance_uuid, mock_get_flavor, mock_upsize,
mock_inst_save, mock_count, mock_limit, mock_record,
mock_migration, mock_validate, mock_is_vol_backed,
mock_allow_resize_to_same_host,
flavor_id_passed=True,
same_host=False, allow_same_host=False,
project_id=None,
@ -1768,6 +1770,7 @@ class _ComputeAPIUnitTestMixIn(object):
allow_cross_cell_resize=False):
self.flags(allow_resize_to_same_host=allow_same_host)
mock_allow_resize_to_same_host.return_value = allow_same_host
params = {}
if project_id is not None:
@ -1871,6 +1874,7 @@ class _ComputeAPIUnitTestMixIn(object):
self.assertEqual(
allow_cross_cell_resize,
fake_spec.requested_destination.allow_cross_cell_move)
mock_allow_resize_to_same_host.assert_called_once()
if host_name:
mock_get_all_by_host.assert_called_once_with(

View File

@ -7705,6 +7705,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
super(ComputeManagerMigrationTestCase, self).setUp()
fake_notifier.stub_notifier(self)
self.addCleanup(fake_notifier.reset)
self.flags(compute_driver='fake.SameHostColdMigrateDriver')
self.compute = manager.ComputeManager()
self.context = context.RequestContext(fakes.FAKE_USER_ID,
fakes.FAKE_PROJECT_ID)
@ -9658,7 +9659,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
@mock.patch.object(objects.Migration, 'get_by_id')
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
@mock.patch('nova.compute.utils.notify_about_instance_action')
def test_live_migration_abort(self, mock_notify_action, mock_driver,
mock_get_migration, mock_notify):
@ -9711,7 +9712,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase,
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
@mock.patch.object(objects.Migration, 'get_by_id')
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
@mock.patch('nova.compute.utils.notify_about_instance_action')
def test_live_migration_abort_not_supported(self, mock_notify_action,
mock_driver,

View File

@ -124,6 +124,9 @@ CAPABILITY_TRAITS_MAP = {
"supports_image_type_vmdk": os_traits.COMPUTE_IMAGE_TYPE_VMDK,
# Added in os-traits 2.0.0
"supports_image_type_ploop": os_traits.COMPUTE_IMAGE_TYPE_PLOOP,
# Added in os-traits 2.1.0.
"supports_migrate_to_same_host": os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE,
}

View File

@ -104,7 +104,7 @@ class FakeDriver(driver.ComputeDriver):
capabilities = {
"has_imagecache": True,
"supports_evacuate": True,
"supports_migrate_to_same_host": True,
"supports_migrate_to_same_host": False,
"supports_attach_interface": True,
"supports_device_tagging": True,
"supports_tagged_attach_interface": True,
@ -702,6 +702,12 @@ class MediumFakeDriver(FakeDriver):
local_gb = 1028
class SameHostColdMigrateDriver(MediumFakeDriver):
"""MediumFakeDriver variant that supports same-host cold migrate."""
capabilities = dict(FakeDriver.capabilities,
supports_migrate_to_same_host=True)
class PowerUpdateFakeDriver(SmallFakeDriver):
# A specific fake driver for the power-update external event testing.

View File

@ -0,0 +1,18 @@
---
fixes:
- |
This release contains a fix for `bug 1748697`_ which distinguishes between
resize and cold migrate such that the ``allow_resize_to_same_host`` config
option is no longer used to determine if a server can be cold migrated to
the same compute service host. Now when requesting a cold migration the
API will check if the source compute node resource provider has the
``COMPUTE_SAME_HOST_COLD_MIGRATE`` trait and if so the scheduler can select
the source host. Note that the only in-tree compute driver that supports
cold migration to the same host is ``VMwareVCDriver``.
To support rolling upgrades with N-1 computes if a node does not report the
trait and is old the API will fallback to the ``allow_resize_to_same_host``
config option value. That compatibility will be removed in a subsequent
release.
.. _bug 1748697: https://launchpad.net/bugs/1748697

View File

@ -55,7 +55,7 @@ psutil>=3.2.2 # BSD
oslo.versionedobjects>=1.35.0 # Apache-2.0
os-brick>=2.6.2 # Apache-2.0
os-resource-classes>=0.4.0 # Apache-2.0
os-traits>=2.0.0 # Apache-2.0
os-traits>=2.1.0 # Apache-2.0
os-vif>=1.14.0 # Apache-2.0
os-win>=3.0.0 # Apache-2.0
castellan>=0.16.0 # Apache-2.0