From 9e04b42c5b4f51ed88514a96790c6393c6973de5 Mon Sep 17 00:00:00 2001 From: Hiroaki Kobayashi Date: Mon, 18 Dec 2017 16:51:51 +0900 Subject: [PATCH] Support instance reservation healing Partially Implements: blueprint resource-monitoring Change-Id: I1d1000e74244778ac8d977abbcd0beaf85c1df03 --- blazar/plugins/instances/instance_plugin.py | 89 +++++-- .../plugins/instances/test_instance_plugin.py | 240 ++++++++++++++---- 2 files changed, 263 insertions(+), 66 deletions(-) diff --git a/blazar/plugins/instances/instance_plugin.py b/blazar/plugins/instances/instance_plugin.py index 0e0373e8..010d6922 100644 --- a/blazar/plugins/instances/instance_plugin.py +++ b/blazar/plugins/instances/instance_plugin.py @@ -12,6 +12,8 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime + from novaclient import exceptions as nova_exceptions from oslo_config import cfg from oslo_log import log as logging @@ -24,6 +26,7 @@ from blazar import exceptions from blazar.manager import exceptions as mgr_exceptions from blazar.plugins import base from blazar.plugins import oshosts +from blazar import status from blazar.utils.openstack import nova from blazar.utils import plugins as plugins_utils @@ -50,6 +53,8 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): project_domain_name=CONF.os_admin_user_domain_name) self.freepool_name = CONF.nova.aggregate_freepool_name + self.monitor = oshosts.host_plugin.PhysicalHostMonitorPlugin() + self.monitor.register_healing_handler(self.heal_reservations) def filter_hosts_by_reservation(self, hosts, start_date, end_date, excludes): @@ -189,20 +194,11 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): removed_host_ids.add(kept_host_ids.pop()) elif len(kept_host_ids) < values['amount']: less = values['amount'] - len(kept_host_ids) - if less > len(extra_host_ids): - raise mgr_exceptions.NotEnoughHostsAvailable() ordered_extra_host_ids = [h['id'] for h in new_hosts if h['id'] in extra_host_ids] - for i in range(less): + for i in range(min(less, len(extra_host_ids))): added_host_ids.add(ordered_extra_host_ids[i]) - reservation = db_api.reservation_get(reservation_id) - if reservation['status'] == 'active' and len(removed_host_ids) > 0: - err_msg = ("Instance reservation doesn't allow to reduce/replace " - "reserved instance slots when the reservation is in " - "active status.") - raise mgr_exceptions.CantUpdateParameter(err_msg) - return {'added': added_host_ids, 'removed': removed_host_ids} def _create_flavor(self, reservation_id, vcpus, memory, disk, group_id): @@ -317,9 +313,9 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): raise exceptions.BlazarException('affinity = True is not ' 'supported.') - try: - hosts = self.pickup_hosts(reservation_id, values) - except mgr_exceptions.NotEnoughHostsAvailable: + hosts = self.pickup_hosts(reservation_id, values) + + if len(hosts['added']) < values['amount']: raise mgr_exceptions.HostNotFound("The reservation can't be " "accommodate because of less " "capacity.") @@ -409,6 +405,13 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): changed_hosts = self.pickup_hosts(reservation_id, new_values) + if (reservation['status'] == 'active' + and len(changed_hosts['removed']) > 0): + err_msg = ("Instance reservation doesn't allow to reduce/replace " + "reserved instance slots when the reservation is in " + "active status.") + raise mgr_exceptions.CantUpdateParameter(err_msg) + db_api.instance_reservation_update( reservation['resource_id'], {key: new_values[key] for key in updatable}) @@ -466,7 +469,7 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): self.cleanup_resources(instance_reservation) - def heal_reservations(cls, failed_resources): + def heal_reservations(self, failed_resources): """Heal reservations which suffer from resource failures. :param: failed_resources: a list of failed hosts. @@ -474,8 +477,60 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657': {'missing_resources': True}} """ + reservation_flags = {} - # TODO(hiro-kobayashi): Implement this method - LOG.warn('heal_reservations() is not implemented yet.') + failed_allocs = [] + for host in failed_resources: + failed_allocs += db_api.host_allocation_get_all_by_values( + compute_host_id=host['id']) - return {} + for alloc in failed_allocs: + reservation = db_api.reservation_get(alloc['reservation_id']) + if reservation['resource_type'] != RESOURCE_TYPE: + continue + pool = None + + # Remove the failed host from the aggregate. + if reservation['status'] == status.reservation.ACTIVE: + host = db_api.host_get(alloc['compute_host_id']) + pool = nova.ReservationPool() + pool.remove_computehost(reservation['aggregate_id'], + host['service_name']) + + # Allocate alternative resource. + values = {} + lease = db_api.lease_get(reservation['lease_id']) + values['start_date'] = max(datetime.datetime.utcnow(), + lease['start_date']) + values['end_date'] = lease['end_date'] + specs = ['vcpus', 'memory_mb', 'disk_gb', 'affinity', 'amount'] + for key in specs: + values[key] = reservation[key] + changed_hosts = self.pickup_hosts(reservation['id'], values) + if len(changed_hosts['added']) == 0: + if reservation['id'] not in reservation_flags: + reservation_flags[reservation['id']] = {} + reservation_flags[reservation['id']].update( + {'missing_resources': True}) + db_api.host_allocation_destroy(alloc['id']) + LOG.warn('Could not find alternative host for reservation %s ' + '(lease: %s).', reservation['id'], lease['name']) + else: + new_host_id = changed_hosts['added'].pop() + db_api.host_allocation_update( + alloc['id'], {'compute_host_id': new_host_id}) + if reservation['status'] == status.reservation.ACTIVE: + # Add the alternative host into the aggregate. + new_host = db_api.host_get(new_host_id) + pool.add_computehost(reservation['aggregate_id'], + new_host['service_name'], + stay_in=True) + if reservation['id'] not in reservation_flags: + reservation_flags[reservation['id']] = {} + reservation_flags[reservation['id']].update( + {'resources_changed': True}) + + LOG.warn('Resource changed for reservation %s (lease: %s).', + reservation['id'], lease['name']) + + return reservation_flags diff --git a/blazar/tests/plugins/instances/test_instance_plugin.py b/blazar/tests/plugins/instances/test_instance_plugin.py index e15b4d1a..bdd77b09 100644 --- a/blazar/tests/plugins/instances/test_instance_plugin.py +++ b/blazar/tests/plugins/instances/test_instance_plugin.py @@ -347,8 +347,10 @@ class TestVirtualInstancePlugin(tests.TestCase): 'start_date': '2030-01-01 08:00', 'end_date': '2030-01-01 12:00' } - self.assertRaises(mgr_exceptions.NotEnoughHostsAvailable, - plugin.pickup_hosts, 'reservation-id1', values) + + hosts = plugin.pickup_hosts('reservation-id1', values) + self.assertTrue((len(hosts['added']) - len(hosts['removed'])) + < values['amount']) def test_max_usage_with_serial_reservation(self): def fake_event_get(sort_key, sort_dir, filters): @@ -593,53 +595,6 @@ class TestVirtualInstancePlugin(tests.TestCase): self.assertEqual(expect['added'], ret['added']) self.assertEqual(expect['removed'], ret['removed']) - def test_pickup_hosts_for_update_in_active(self): - reservation = {'id': 'reservation-id1', 'status': 'active'} - plugin = instance_plugin.VirtualInstancePlugin() - - mock_alloc_get = self.patch(db_api, - 'host_allocation_get_all_by_values') - mock_alloc_get.return_value = [ - {'compute_host_id': 'host-id1'}, {'compute_host_id': 'host-id2'}, - {'compute_host_id': 'host-id3'}] - mock_query_available = self.patch(plugin, 'query_available_hosts') - mock_query_available.return_value = [ - self.generate_host_info('host-id2', 2, 2024, 1000), - self.generate_host_info('host-id3', 2, 2024, 1000), - self.generate_host_info('host-id4', 2, 2024, 1000)] - - mock_reservation_get = self.patch(db_api, 'reservation_get') - mock_reservation_get.return_value = reservation - - # case: new amount is less than old amount - values = self.get_input_values(1, 1024, 10, 1, False, - '2020-07-01 10:00', '2020-07-01 11:00', - 'lease-1') - self.assertRaises(mgr_exceptions.CantUpdateParameter, - plugin.pickup_hosts, reservation['id'], values) - - # case: new amount is same but change allocations - values = self.get_input_values(1, 1024, 10, 3, False, - '2020-07-01 10:00', '2020-07-01 11:00', - 'lease-1') - self.assertRaises(mgr_exceptions.CantUpdateParameter, - plugin.pickup_hosts, reservation['id'], values) - - # case: new amount is greater than old amount - mock_query_available.return_value = [ - self.generate_host_info('host-id1', 2, 2024, 1000), - self.generate_host_info('host-id2', 2, 2024, 1000), - self.generate_host_info('host-id3', 2, 2024, 1000), - self.generate_host_info('host-id4', 2, 2024, 1000)] - - values = self.get_input_values(1, 1024, 10, 4, False, - '2020-07-01 10:00', '2020-07-01 11:00', - 'lease-1') - expect = {'added': set(['host-id4']), 'removed': set([])} - ret = plugin.pickup_hosts(reservation['id'], values) - self.assertEqual(expect['added'], ret['added']) - self.assertEqual(expect['removed'], ret['removed']) - def test_update_resources(self): reservation = { 'id': 'reservation-id1', @@ -877,3 +832,190 @@ class TestVirtualInstancePlugin(tests.TestCase): fake.delete.assert_called_once() mock_cleanup_resources.assert_called_once_with( fake_instance_reservation) + + def test_heal_reservations_before_start_and_resources_changed(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_hosts = [{'id': 1}] + new_host_ids = [2] + alloc_get = self.patch(db_api, + 'host_allocation_get_all_by_values') + alloc_get.return_value = [{'id': 'alloc-1', + 'compute_host_id': '1', + 'reservation_id': 'rsrv-1'}] + alloc_destroy = self.patch(db_api, 'host_allocation_destroy') + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3} + host_get = self.patch(db_api, 'host_get') + host_get.return_value = {'service_name': 'compute'} + mock_pool = self.patch(nova, 'ReservationPool') + mock_pool.return_value = mock.MagicMock() + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} + alloc_update = self.patch(db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime(2020, 1, 1, + 11, 00) + result = plugin.heal_reservations(failed_hosts) + alloc_destroy.assert_not_called() + pickup_hosts.assert_called_once() + alloc_update.assert_called_once_with('alloc-1', + {'compute_host_id': 2}) + self.assertEqual({}, result) + + def test_heal_reservations_before_start_and_missing_resources(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_hosts = [{'id': 1}] + new_host_ids = [] + alloc_get = self.patch(db_api, + 'host_allocation_get_all_by_values') + alloc_get.return_value = [{'id': 'alloc-1', + 'compute_host_id': '1', + 'reservation_id': 'rsrv-1'}] + alloc_destroy = self.patch(db_api, 'host_allocation_destroy') + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3} + host_get = self.patch(db_api, 'host_get') + host_get.return_value = {'service_name': 'compute'} + mock_pool = self.patch(nova, 'ReservationPool') + mock_pool.return_value = mock.MagicMock() + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} + alloc_update = self.patch(db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime(2020, 1, 1, + 11, 00) + result = plugin.heal_reservations(failed_hosts) + alloc_destroy.assert_called_once_with('alloc-1') + pickup_hosts.assert_called_once() + alloc_update.assert_not_called() + self.assertEqual({'rsrv-1': {'missing_resources': True}}, result) + + def test_heal_active_reservations_and_resources_changed(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_hosts = [{'id': 1}] + new_host_ids = [2] + alloc_get = self.patch(db_api, + 'host_allocation_get_all_by_values') + alloc_get.return_value = [{'id': 'alloc-1', + 'compute_host_id': '1', + 'reservation_id': 'rsrv-1'}] + alloc_destroy = self.patch(db_api, 'host_allocation_destroy') + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3} + host_get = self.patch(db_api, 'host_get') + host_get.return_value = {'service_name': 'compute'} + fake_pool = mock.MagicMock() + mock_pool = self.patch(nova, 'ReservationPool') + mock_pool.return_value = fake_pool + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} + alloc_update = self.patch(db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime(2020, 1, 1, + 13, 00) + result = plugin.heal_reservations(failed_hosts) + alloc_destroy.assert_not_called() + pickup_hosts.assert_called_once() + alloc_update.assert_called_once_with('alloc-1', + {'compute_host_id': 2}) + fake_pool.add_computehost.assert_called_once_with('agg-1', + 'compute', + stay_in=True) + self.assertEqual({'rsrv-1': {'resources_changed': True}}, result) + + def test_heal_active_reservations_and_missing_resources(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_hosts = [{'id': 1}] + new_host_ids = [] + alloc_get = self.patch(db_api, + 'host_allocation_get_all_by_values') + alloc_get.return_value = [{'id': 'alloc-1', + 'compute_host_id': '1', + 'reservation_id': 'rsrv-1'}] + alloc_destroy = self.patch(db_api, 'host_allocation_destroy') + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3} + host_get = self.patch(db_api, 'host_get') + host_get.return_value = {'service_name': 'compute'} + fake_pool = mock.MagicMock() + mock_pool = self.patch(nova, 'ReservationPool') + mock_pool.return_value = fake_pool + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} + alloc_update = self.patch(db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime(2020, 1, 1, + 13, 00) + result = plugin.heal_reservations(failed_hosts) + alloc_destroy.assert_called_once_with('alloc-1') + pickup_hosts.assert_called_once() + alloc_update.assert_not_called() + self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)