diff --git a/compute_hyperv/nova/cluster/clusterops.py b/compute_hyperv/nova/cluster/clusterops.py index 4e5c2780..86ad3de6 100644 --- a/compute_hyperv/nova/cluster/clusterops.py +++ b/compute_hyperv/nova/cluster/clusterops.py @@ -36,6 +36,7 @@ import compute_hyperv.nova.conf from compute_hyperv.nova import coordination from compute_hyperv.nova import hostops from compute_hyperv.nova import serialconsoleops +from compute_hyperv.nova.utils import placement as placement_utils from compute_hyperv.nova import vmops LOG = logging.getLogger(__name__) @@ -56,6 +57,7 @@ class ClusterOps(object): self._network_api = network.API() self._vmops = vmops.VMOps() self._serial_console_ops = serialconsoleops.SerialConsoleOps() + self._placement = placement_utils.PlacementUtils() def get_instance_host(self, instance): return self._clustutils.get_vm_host(instance.name) @@ -179,6 +181,13 @@ class ClusterOps(object): self._nova_failover_server(instance, new_host) if host_changed: self._failover_migrate_networks(instance, old_host) + try: + self._placement.move_compute_node_allocations( + self._context, instance, old_host, new_host, + merge_existing=False) + except Exception: + LOG.exception("Could not update failed over instance '%s' " + "allocations.", instance) self._vmops.plug_vifs(instance, nw_info) self._serial_console_ops.start_console_handler(instance_name) diff --git a/compute_hyperv/nova/utils/__init__.py b/compute_hyperv/nova/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/compute_hyperv/nova/utils/placement.py b/compute_hyperv/nova/utils/placement.py new file mode 100644 index 00000000..7d588958 --- /dev/null +++ b/compute_hyperv/nova/utils/placement.py @@ -0,0 +1,110 @@ +# Copyright 2018 Cloudbase Solutions Srl +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova import exception +from nova import objects +from nova.scheduler.client import report +from nova.scheduler import utils as scheduler_utils +from oslo_log import log as logging + +LOG = logging.getLogger(__name__) + +SYMMETRIC_GET_PUT_ALLOCATIONS = "1.12" # Queens + + +class PlacementUtils(object): + def __init__(self): + self.reportclient = report.SchedulerReportClient() + + def move_compute_node_allocations(self, context, instance, old_host, + new_host, merge_existing=True): + LOG.info("Moving instance allocations from compute node %s to %s.", + old_host, new_host, instance=instance) + + cn_uuid = objects.ComputeNode.get_by_host_and_nodename( + context, old_host, old_host).uuid + new_cn_uuid = objects.ComputeNode.get_by_host_and_nodename( + context, new_host, new_host).uuid + + self.move_allocations(context, instance.uuid, cn_uuid, + new_cn_uuid, + merge_existing=merge_existing) + + @report.retries + def move_allocations(self, context, consumer_uuid, old_rp_uuid, + new_rp_uuid, merge_existing=True): + allocs = self._get_allocs_for_consumer( + context, consumer_uuid, + version=SYMMETRIC_GET_PUT_ALLOCATIONS) + allocations = allocs['allocations'] + + if old_rp_uuid == new_rp_uuid: + LOG.debug("Requested to move allocations to the " + "same provider: %s.", old_rp_uuid) + return + + if old_rp_uuid not in allocations: + LOG.warning("Expected to find allocations referencing resource " + "provider %s for %s, but found none.", + old_rp_uuid, consumer_uuid) + return + + if merge_existing and new_rp_uuid in allocations: + LOG.info("Merging existing allocations for consumer %s on " + "provider %s: %s.", + consumer_uuid, new_rp_uuid, allocations) + scheduler_utils.merge_resources( + allocations[new_rp_uuid]['resources'], + allocations[old_rp_uuid]['resources']) + else: + if new_rp_uuid in allocations: + LOG.info("Replacing existing allocations for consumer %s " + "on provider %s: %s", + consumer_uuid, new_rp_uuid, allocations) + + allocations[new_rp_uuid] = allocations[old_rp_uuid] + + del allocations[old_rp_uuid] + self._put_allocs(context, consumer_uuid, allocs, + version=SYMMETRIC_GET_PUT_ALLOCATIONS) + + def _put_allocs(self, context, consumer_uuid, allocations, version=None): + url = '/allocations/%s' % consumer_uuid + r = self.reportclient.put(url, allocations, + version=version) + if r.status_code != 204: + errors = r.json().get('errors') or [] + # NOTE(jaypipes): Yes, it sucks doing string comparison like this + # but we have no error codes, only error messages. + # TODO(gibi): Use more granular error codes when available + for err in errors: + if err.get('code') == 'placement.concurrent_update': + reason = ( + 'another process changed the resource providers ' + 'involved in our attempt to put allocations for ' + 'consumer %s' % consumer_uuid) + raise report.Retry('put_allocations', reason) + raise exception.AllocationUpdateFailed( + consumer_uuid=consumer_uuid, error=errors) + + def _get_allocs_for_consumer(self, context, consumer, version=None): + resp = self.reportclient.get('/allocations/%s' % consumer, + version=version) + if not resp: + # TODO(efried): Use code/title/detail to make a better exception + raise exception.ConsumerAllocationRetrievalFailed( + consumer_uuid=consumer, error=resp.text) + + return resp.json() diff --git a/compute_hyperv/tests/unit/cluster/test_clusterops.py b/compute_hyperv/tests/unit/cluster/test_clusterops.py index 9295d875..c272258f 100644 --- a/compute_hyperv/tests/unit/cluster/test_clusterops.py +++ b/compute_hyperv/tests/unit/cluster/test_clusterops.py @@ -18,6 +18,7 @@ import mock from nova.compute import power_state from nova.compute import task_states from nova.compute import vm_states +from nova import exception from nova.network.neutronv2 import api as network_api from nova import objects from nova.virt import event as virtevent @@ -41,6 +42,7 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): network_api.API, clusterops.vmops.VMOps, clusterops.serialconsoleops.SerialConsoleOps, + clusterops.placement_utils.PlacementUtils, ] _FAKE_INSTANCE_NAME = 'fake_instance_name' @@ -51,8 +53,10 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): self.clusterops = clusterops.ClusterOps() self.clusterops._context = self.context + self._clustutils = self.clusterops._clustutils self._network_api = self.clusterops._network_api + self._placement = self.clusterops._placement def test_get_instance_host(self): mock_instance = fake_instance.fake_instance_obj(self.context) @@ -219,6 +223,9 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): instance.host = old_host self.clusterops._this_node = new_host self._clustutils.get_vm_host.return_value = new_host + # Placement exceptions shouldn't break the rest of the failover logic. + self._placement.move_compute_node_allocations.side_effect = ( + exception.NovaException) self.clusterops._failover_migrate(mock.sentinel.instance_name, new_host) @@ -235,6 +242,9 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): mock_nova_failover_server.assert_called_once_with(instance, new_host) mock_failover_migrate_networks.assert_called_once_with( instance, old_host) + self._placement.move_compute_node_allocations.assert_called_once_with( + self.clusterops._context, instance, old_host, new_host, + merge_existing=False) self.clusterops._vmops.plug_vifs.assert_called_once_with( instance, get_inst_nw_info.return_value) c_handler = self.clusterops._serial_console_ops.start_console_handler @@ -268,6 +278,7 @@ class ClusterOpsTestCase(test_base.HyperVBaseTestCase): self.clusterops._vmops.unplug_vifs.assert_not_called() self.clusterops._vmops.plug_vifs.assert_called_once_with( instance, get_inst_nw_info.return_value) + self._placement.move_compute_node_allocations.assert_not_called() mock_failover_migrate_networks.assert_not_called() c_handler = self.clusterops._serial_console_ops.start_console_handler c_handler.assert_called_once_with(mock.sentinel.instance_name) diff --git a/compute_hyperv/tests/unit/utils/__init__.py b/compute_hyperv/tests/unit/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/compute_hyperv/tests/unit/utils/test_placement.py b/compute_hyperv/tests/unit/utils/test_placement.py new file mode 100644 index 00000000..cda63b7e --- /dev/null +++ b/compute_hyperv/tests/unit/utils/test_placement.py @@ -0,0 +1,156 @@ +# Copyright 2018 Cloudbase Solutions Srl +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ddt +import mock +from nova import context +from nova import exception +from nova import objects +from nova.tests.unit import fake_requests +from oslo_serialization import jsonutils + +from compute_hyperv.nova.utils import placement as placement +from compute_hyperv.tests import fake_instance +from compute_hyperv.tests.unit import test_base + + +@ddt.ddt +class PlacementUtilsTestCase(test_base.HyperVBaseTestCase): + _autospec_classes = [ + placement.report.SchedulerReportClient + ] + + _FAKE_PROVIDER = 'fdb5c6d0-e0e9-4411-b952-fb05d6133718' + _FAKE_RESOURCES = {'VCPU': 1, 'MEMORY_MB': 512, 'DISK_GB': 1} + _FAKE_ALLOCATIONS = { + _FAKE_PROVIDER: {'resources': _FAKE_RESOURCES} + } + + def setUp(self): + super(PlacementUtilsTestCase, self).setUp() + self.context = context.get_admin_context() + self.instance = fake_instance.fake_instance_obj(self.context) + + self.placement = placement.PlacementUtils() + self.client = self.placement.reportclient + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') + @mock.patch.object(placement.PlacementUtils, 'move_allocations') + def test_move_compute_node_allocations(self, mock_move_alloc, + mock_get_comp_node): + mock_get_comp_node.side_effect = [ + mock.Mock(uuid=uuid) for uuid in [mock.sentinel.old_host_uuid, + mock.sentinel.new_host_uuid]] + + self.placement.move_compute_node_allocations( + self.context, self.instance, mock.sentinel.old_host, + mock.sentinel.new_host, + merge_existing=mock.sentinel.merge_existing) + + mock_move_alloc.assert_called_once_with( + self.context, self.instance.uuid, + mock.sentinel.old_host_uuid, + mock.sentinel.new_host_uuid, + merge_existing=mock.sentinel.merge_existing) + mock_get_comp_node.assert_has_calls( + mock.call(self.context, host, host) for host in + [mock.sentinel.old_host, mock.sentinel.new_host]) + + @ddt.data({}, # provider did not change + {'old_rp': 'fake_rp'}) # provider not included in allocations + @ddt.unpack + @mock.patch.object(placement.PlacementUtils, '_get_allocs_for_consumer') + @mock.patch.object(placement.PlacementUtils, '_put_allocs') + def test_move_allocations_noop(self, mock_put, mock_get_allocs, + old_rp=_FAKE_PROVIDER, + new_rp=_FAKE_PROVIDER): + mock_get_allocs.return_value = {'allocations': self._FAKE_ALLOCATIONS} + + self.placement.move_allocations( + self.context, mock.sentinel.consumer, old_rp, new_rp) + + mock_get_allocs.assert_called_once_with( + self.context, mock.sentinel.consumer, + version=placement.SYMMETRIC_GET_PUT_ALLOCATIONS) + mock_put.assert_not_called() + + @ddt.data(True, False) + @mock.patch.object(placement.PlacementUtils, '_get_allocs_for_consumer') + @mock.patch.object(placement.PlacementUtils, '_put_allocs') + def test_merge_allocations(self, merge_existing, + mock_put, mock_get_allocs): + old_rp = self._FAKE_PROVIDER + new_rp = 'new_rp' + allocs = self._FAKE_ALLOCATIONS.copy() + allocs[new_rp] = {'resources': self._FAKE_RESOURCES.copy()} + + mock_get_allocs.return_value = {'allocations': allocs} + + if merge_existing: + exp_resources = {'VCPU': 2, 'MEMORY_MB': 1024, 'DISK_GB': 2} + else: + exp_resources = self._FAKE_RESOURCES + exp_allocs = {new_rp: {'resources': exp_resources}} + + self.placement.move_allocations( + self.context, mock.sentinel.consumer, old_rp, new_rp, + merge_existing=merge_existing) + + mock_put.assert_called_once_with( + self.context, mock.sentinel.consumer, + {'allocations': exp_allocs}, + version=placement.SYMMETRIC_GET_PUT_ALLOCATIONS) + + @ddt.data({}, # no errors + {'status_code': 409, + 'errors': [{'code': 'placement.concurrent_update'}], + 'expected_exc': placement.report.Retry}, + {'status_code': 500, + 'expected_exc': exception.AllocationUpdateFailed}) + @ddt.unpack + def test_put_allocs(self, status_code=204, expected_exc=None, errors=None): + response = fake_requests.FakeResponse( + status_code, + content=jsonutils.dumps({'errors': errors})) + self.client.put.return_value = response + + args = (self.context, mock.sentinel.consumer, mock.sentinel.allocs, + mock.sentinel.version) + if expected_exc: + self.assertRaises(expected_exc, self.placement._put_allocs, *args) + else: + self.placement._put_allocs(*args) + + self.client.put.assert_called_once_with( + '/allocations/%s' % mock.sentinel.consumer, + mock.sentinel.allocs, + version=mock.sentinel.version) + + def test_get_allocs(self): + ret_val = self.placement._get_allocs_for_consumer( + self.context, mock.sentinel.consumer, mock.sentinel.version) + exp_val = self.client.get.return_value.json.return_value + self.assertEqual(exp_val, ret_val) + + self.client.get.assert_called_once_with( + '/allocations/%s' % mock.sentinel.consumer, + version=mock.sentinel.version) + + def test_get_allocs_missing(self): + self.client.get.return_value = fake_requests.FakeResponse(500) + self.assertRaises( + exception.ConsumerAllocationRetrievalFailed, + self.placement._get_allocs_for_consumer, + self.context, mock.sentinel.consumer, mock.sentinel.version)