diff --git a/blazar/db/api.py b/blazar/db/api.py index e2781f03..c712e64e 100644 --- a/blazar/db/api.py +++ b/blazar/db/api.py @@ -359,6 +359,12 @@ def reservable_host_get_all_by_queries(queries): return IMPL.reservable_host_get_all_by_queries(queries) +@to_dict +def unreservable_host_get_all_by_queries(queries): + """Returns unreservable hosts filtered by an array of queries.""" + return IMPL.unreservable_host_get_all_by_queries(queries) + + def host_destroy(host_id): """Delete specific Compute host.""" IMPL.host_destroy(host_id) diff --git a/blazar/db/sqlalchemy/api.py b/blazar/db/sqlalchemy/api.py index 528ee4d3..bb9fa781 100644 --- a/blazar/db/sqlalchemy/api.py +++ b/blazar/db/sqlalchemy/api.py @@ -688,6 +688,20 @@ def reservable_host_get_all_by_queries(queries): return host_get_all_by_queries(queries) +def unreservable_host_get_all_by_queries(queries): + """Returns unreservable hosts filtered by an array of queries. + + :param queries: array of queries "key op value" where op can be + http://docs.sqlalchemy.org/en/rel_0_7/core/expression_api.html + #sqlalchemy.sql.operators.ColumnOperators + + """ + + # TODO(hiro-kobayashi): support the expression 'reservable == False' + queries.append('reservable == 0') + return host_get_all_by_queries(queries) + + def host_create(values): values = values.copy() host = models.ComputeHost() diff --git a/blazar/db/sqlalchemy/models.py b/blazar/db/sqlalchemy/models.py index 123779d1..d7466f4c 100644 --- a/blazar/db/sqlalchemy/models.py +++ b/blazar/db/sqlalchemy/models.py @@ -99,7 +99,7 @@ class Reservation(mb.BlazarBase): backref='reservation', lazy='joined') computehost_allocations = relationship('ComputeHostAllocation', - uselist=False, + uselist=True, cascade="all,delete", backref='reservation', lazy='joined') diff --git a/blazar/db/sqlalchemy/utils.py b/blazar/db/sqlalchemy/utils.py index 4c447c33..f04f09e2 100644 --- a/blazar/db/sqlalchemy/utils.py +++ b/blazar/db/sqlalchemy/utils.py @@ -70,6 +70,20 @@ def get_reservations_by_host_id(host_id, start_date, end_date): return query.all() +def get_reservations_by_host_ids(host_ids, start_date, end_date): + session = get_session() + border0 = sa.and_(models.Lease.start_date < start_date, + models.Lease.end_date < start_date) + border1 = sa.and_(models.Lease.start_date > end_date, + models.Lease.end_date > end_date) + query = (session.query(models.Reservation).join(models.Lease) + .join(models.ComputeHostAllocation) + .filter(models.ComputeHostAllocation.compute_host_id + .in_(host_ids)) + .filter(~sa.or_(border0, border1))) + return query.all() + + def get_free_periods(resource_id, start_date, end_date, duration): """Returns a list of free periods.""" reserved_periods = get_reserved_periods(resource_id, diff --git a/blazar/db/utils.py b/blazar/db/utils.py index 51aa3749..c563589d 100644 --- a/blazar/db/utils.py +++ b/blazar/db/utils.py @@ -107,6 +107,10 @@ def get_reservations_by_host_id(host_id, start_date, end_date): return IMPL.get_reservations_by_host_id(host_id, start_date, end_date) +def get_reservations_by_host_ids(host_ids, start_date, end_date): + return IMPL.get_reservations_by_host_ids(host_ids, start_date, end_date) + + def get_free_periods(resource_id, start_date, end_date, duration): """Returns a list of free periods.""" return IMPL.get_free_periods(resource_id, start_date, end_date, duration) diff --git a/blazar/manager/exceptions.py b/blazar/manager/exceptions.py index 51575aeb..704af673 100644 --- a/blazar/manager/exceptions.py +++ b/blazar/manager/exceptions.py @@ -175,3 +175,8 @@ class InvalidRange(exceptions.BlazarException): class CantUpdateParameter(exceptions.BlazarException): code = 409 msg_fmt = _("%(param)s cannot be updated") + + +class InvalidPeriod(exceptions.BlazarException): + code = 400 + msg_fmt = _('The end_date must be later than the start_date.') diff --git a/blazar/monitor/base.py b/blazar/monitor/base.py index f2f50a3f..5be2b5b8 100644 --- a/blazar/monitor/base.py +++ b/blazar/monitor/base.py @@ -11,32 +11,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -import abc - from oslo_log import log as logging -import six +from oslo_service import threadgroup from blazar.db import api as db_api LOG = logging.getLogger(__name__) -@six.add_metaclass(abc.ABCMeta) class BaseMonitor(object): """Base class for monitoring classes.""" - @abc.abstractmethod + def __init__(self, monitor_plugins): + self.monitor_plugins = monitor_plugins + self.tg = threadgroup.ThreadGroup() + self.healing_timers = [] + def start_monitoring(self): """Start monitoring.""" - pass + self.start_periodic_healing() - @abc.abstractmethod def stop_monitoring(self): """Stop monitoring.""" - pass + self.stop_periodic_healing() - def update_statuses(self, callback, *args, **kwargs): - """Update leases and reservations table after executing a callback.""" + def start_periodic_healing(self): + """Start periodic healing process.""" + for plugin in self.monitor_plugins: + healing_interval_mins = plugin.get_healing_interval() + if healing_interval_mins > 0: + self.healing_timers.append( + self.tg.add_timer(healing_interval_mins * 60, + self.call_monitor_plugin, + None, + plugin.heal)) + + def stop_periodic_healing(self): + """Stop periodic healing process.""" + for timer in self.healing_timers: + self.tg.timer_done(timer) + + def call_monitor_plugin(self, callback, *args, **kwargs): + """Call a callback and update lease/reservation flags.""" try: # The callback() has to return a dictionary of # {reservation id: flags to update}. @@ -46,17 +62,20 @@ class BaseMonitor(object): LOG.exception('Caught an exception while executing a callback. ' '%s', str(e)) - # TODO(hiro-kobayashi): update statuses of related leases and - # reservations. Depends on the state-machine blueprint. + if reservation_flags: + self._update_flags(reservation_flags) - # Update flags of related leases and reservations. + def _update_flags(self, reservation_flags): + """Update lease/reservation flags.""" lease_ids = set([]) + for reservation_id, flags in reservation_flags.items(): db_api.reservation_update(reservation_id, flags) LOG.debug('Reservation %s was updated: %s', reservation_id, flags) reservation = db_api.reservation_get(reservation_id) lease_ids.add(reservation['lease_id']) + for lease_id in lease_ids: LOG.debug('Lease %s was updated: {"degraded": True}', lease_id) db_api.lease_update(lease_id, {'degraded': True}) diff --git a/blazar/monitor/notification_monitor.py b/blazar/monitor/notification_monitor.py index 12a4dbe8..07c8d0cf 100644 --- a/blazar/monitor/notification_monitor.py +++ b/blazar/monitor/notification_monitor.py @@ -28,6 +28,7 @@ class NotificationMonitor(base.BaseMonitor): def __init__(self, monitor_plugins): """Initialize a notification monitor.""" LOG.debug('Initializing a notification monitor...') + super(NotificationMonitor, self).__init__(monitor_plugins) try: self.handlers = defaultdict(list) self.listener = oslo_messaging.get_notification_listener( @@ -46,6 +47,7 @@ class NotificationMonitor(base.BaseMonitor): LOG.debug('Starting a notification monitor...') try: self.listener.start() + super(NotificationMonitor, self).start_monitoring() except Exception as e: LOG.exception('Failed to start a notification monitor. (%s)', str(e)) @@ -55,6 +57,7 @@ class NotificationMonitor(base.BaseMonitor): LOG.debug('Stopping a notification monitor...') try: self.listener.stop() + super(NotificationMonitor, self).stop_monitoring() except Exception as e: LOG.exception('Failed to stop a notification monitor. (%s)', str(e)) @@ -85,9 +88,9 @@ class NotificationMonitor(base.BaseMonitor): for plugin in monitor_plugins: for event_type in plugin.get_notification_event_types(): self.handlers[event_type].append( - # Wrap a notification callback with the update_statuses() - # to manage statuses of leases and reservations. - lambda e_type, payload: self.update_statuses( + # Wrap the notification callback with the + # call_monitor_plugin() to manage lease/reservation flags. + lambda e_type, payload: self.call_monitor_plugin( plugin.notification_callback, e_type, payload)) return [NotificationEndpoint(self)] diff --git a/blazar/monitor/polling_monitor.py b/blazar/monitor/polling_monitor.py index a5ee04dc..abee41a2 100644 --- a/blazar/monitor/polling_monitor.py +++ b/blazar/monitor/polling_monitor.py @@ -12,7 +12,6 @@ # limitations under the License. from oslo_log import log as logging -from oslo_service import threadgroup from blazar.monitor import base @@ -24,19 +23,23 @@ class PollingMonitor(base.BaseMonitor): def __init__(self, monitor_plugins): """Initialize a polling monitor.""" - self.monitor_plugins = monitor_plugins - self.tg = threadgroup.ThreadGroup() + LOG.debug('Initializing a polling monitor...') + super(PollingMonitor, self).__init__(monitor_plugins) + self.polling_timers = [] def start_monitoring(self): """Start polling.""" LOG.debug('Starting a polling monitor...') + try: for plugin in self.monitor_plugins: - # Set poll() timer. The poll() is wrapped with the - # update_statuses() to manage statuses of leases and - # reservations. - self.tg.add_timer(plugin.get_polling_interval(), - self.update_statuses, 0, plugin.poll) + # Set polling timer. Wrap the monitor plugin method with the + # call_monitor_plugin() to manage lease/reservation flags. + self.polling_timers.append( + self.tg.add_timer(plugin.get_polling_interval(), + self.call_monitor_plugin, None, + plugin.poll)) + super(PollingMonitor, self).start_monitoring() except Exception as e: LOG.exception('Failed to start a polling monitor. (%s)', str(e)) @@ -45,6 +48,8 @@ class PollingMonitor(base.BaseMonitor): """Stop polling.""" LOG.debug('Stopping a polling monitor...') try: - self.tg.stop() + for timer in self.polling_timers: + self.tg.timer_done(timer) + super(PollingMonitor, self).stop_monitoring() except Exception as e: LOG.exception('Failed to stop a polling monitor. (%s)', str(e)) diff --git a/blazar/plugins/base.py b/blazar/plugins/base.py index 7df7c6b3..31a3b496 100644 --- a/blazar/plugins/base.py +++ b/blazar/plugins/base.py @@ -86,10 +86,13 @@ class BasePlugin(object): """Take actions before the end of a lease""" pass - def heal_reservations(self, failed_resources): + def heal_reservations(self, failed_resources, interval_begin, + interval_end): """Heal reservations which suffer from resource failures. :param: failed_resources: failed resources + :param: interval_begin: start date of the period to heal. + :param: interval_end: end date of the period to heal. :return: a dictionary of {reservation id: flags to update} e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657': {'missing_resources': True}} @@ -148,3 +151,15 @@ class BaseMonitorPlugin(): {'missing_resources': True}} """ pass + + @abc.abstractmethod + def get_healing_interval(self): + """Get interval of reservation healing in minutes.""" + pass + + @abc.abstractmethod + def heal(self): + """Heal suffering reservations. + + :return: a dictionary of {reservation id: flags to update} + """ diff --git a/blazar/plugins/instances/instance_plugin.py b/blazar/plugins/instances/instance_plugin.py index 2e422d33..f0e5edc7 100644 --- a/blazar/plugins/instances/instance_plugin.py +++ b/blazar/plugins/instances/instance_plugin.py @@ -469,68 +469,86 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper): self.cleanup_resources(instance_reservation) - def heal_reservations(self, failed_resources): + def heal_reservations(self, failed_resources, interval_begin, + interval_end): """Heal reservations which suffer from resource failures. - :param: failed_resources: a list of failed hosts. + :param: failed_resources: failed resources + :param: interval_begin: start date of the period to heal. + :param: interval_end: end date of the period to heal. :return: a dictionary of {reservation id: flags to update} e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657': {'missing_resources': True}} """ reservation_flags = {} - failed_allocs = [] - for host in failed_resources: - failed_allocs += db_api.host_allocation_get_all_by_values( - compute_host_id=host['id']) + host_ids = [h['id'] for h in failed_resources] + reservations = db_utils.get_reservations_by_host_ids(host_ids, + interval_begin, + interval_end) - for alloc in failed_allocs: - reservation = db_api.reservation_get(alloc['reservation_id']) + for reservation in reservations: if reservation['resource_type'] != RESOURCE_TYPE: continue - pool = None - # Remove the failed host from the aggregate. - if reservation['status'] == status.reservation.ACTIVE: - host = db_api.host_get(alloc['compute_host_id']) - pool = nova.ReservationPool() - pool.remove_computehost(reservation['aggregate_id'], - host['service_name']) - - # Allocate alternative resource. - values = {} - lease = db_api.lease_get(reservation['lease_id']) - values['start_date'] = max(datetime.datetime.utcnow(), - lease['start_date']) - values['end_date'] = lease['end_date'] - specs = ['vcpus', 'memory_mb', 'disk_gb', 'affinity', 'amount'] - for key in specs: - values[key] = reservation[key] - changed_hosts = self.pickup_hosts(reservation['id'], values) - if len(changed_hosts['added']) == 0: - if reservation['id'] not in reservation_flags: - reservation_flags[reservation['id']] = {} - reservation_flags[reservation['id']].update( - {'missing_resources': True}) - db_api.host_allocation_destroy(alloc['id']) - LOG.warn('Could not find alternative host for reservation %s ' - '(lease: %s).', reservation['id'], lease['name']) - else: - new_host_id = changed_hosts['added'].pop() - db_api.host_allocation_update( - alloc['id'], {'compute_host_id': new_host_id}) - if reservation['status'] == status.reservation.ACTIVE: - # Add the alternative host into the aggregate. - new_host = db_api.host_get(new_host_id) - pool.add_computehost(reservation['aggregate_id'], - new_host['service_name'], - stay_in=True) + for allocation in [alloc for alloc + in reservation['computehost_allocations'] + if alloc['compute_host_id'] in host_ids]: + if self._reallocate(allocation): + if reservation['status'] == status.reservation.ACTIVE: + if reservation['id'] not in reservation_flags: + reservation_flags[reservation['id']] = {} + reservation_flags[reservation['id']].update( + {'resources_changed': True}) + else: if reservation['id'] not in reservation_flags: reservation_flags[reservation['id']] = {} reservation_flags[reservation['id']].update( - {'resources_changed': True}) - - LOG.warn('Resource changed for reservation %s (lease: %s).', - reservation['id'], lease['name']) + {'missing_resources': True}) return reservation_flags + + def _reallocate(self, allocation): + """Allocate an alternative host. + + :param: allocation: allocation to change. + :return: True if an alternative host was successfully allocated. + """ + reservation = db_api.reservation_get(allocation['reservation_id']) + pool = nova.ReservationPool() + + # Remove the failed host from the aggregate. + if reservation['status'] == status.reservation.ACTIVE: + host = db_api.host_get(allocation['compute_host_id']) + pool.remove_computehost(reservation['aggregate_id'], + host['service_name']) + + # Allocate an alternative host. + values = {} + lease = db_api.lease_get(reservation['lease_id']) + values['start_date'] = max(datetime.datetime.utcnow(), + lease['start_date']) + values['end_date'] = lease['end_date'] + specs = ['vcpus', 'memory_mb', 'disk_gb', 'affinity', 'amount'] + for key in specs: + values[key] = reservation[key] + changed_hosts = self.pickup_hosts(reservation['id'], values) + if len(changed_hosts['added']) == 0: + db_api.host_allocation_destroy(allocation['id']) + LOG.warn('Could not find alternative host for reservation %s ' + '(lease: %s).', reservation['id'], lease['name']) + return False + else: + new_host_id = changed_hosts['added'].pop() + db_api.host_allocation_update( + allocation['id'], {'compute_host_id': new_host_id}) + if reservation['status'] == status.reservation.ACTIVE: + # Add the alternative host into the aggregate. + new_host = db_api.host_get(new_host_id) + pool.add_computehost(reservation['aggregate_id'], + new_host['service_name'], + stay_in=True) + LOG.warn('Resource changed for reservation %s (lease: %s).', + reservation['id'], lease['name']) + + return True diff --git a/blazar/plugins/oshosts/host_plugin.py b/blazar/plugins/oshosts/host_plugin.py index d15164c7..8a41e1b8 100644 --- a/blazar/plugins/oshosts/host_plugin.py +++ b/blazar/plugins/oshosts/host_plugin.py @@ -66,6 +66,11 @@ plugin_opts = [ cfg.IntOpt('polling_interval', default=60, help='Interval (seconds) of polling for health checking.'), + cfg.IntOpt('healing_interval', + default=60, + help='Interval (minutes) of reservation healing. ' + 'If 0 is specified, the interval is infinite and all the ' + 'reservations in the future is healed at one time.'), ] CONF = cfg.CONF @@ -210,73 +215,89 @@ class PhysicalHostPlugin(base.BasePlugin, nova.NovaClientWrapper): except manager_ex.AggregateNotFound: pass - def heal_reservations(self, failed_resources): + def heal_reservations(self, failed_resources, interval_begin, + interval_end): """Heal reservations which suffer from resource failures. :param: failed_resources: a list of failed hosts. + :param: interval_begin: start date of the period to heal. + :param: interval_end: end date of the period to heal. :return: a dictionary of {reservation id: flags to update} e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657': {'missing_resources': True}} """ reservation_flags = {} - failed_allocs = [] - for host in failed_resources: - failed_allocs += db_api.host_allocation_get_all_by_values( - compute_host_id=host['id']) + host_ids = [h['id'] for h in failed_resources] + reservations = db_utils.get_reservations_by_host_ids(host_ids, + interval_begin, + interval_end) - for alloc in failed_allocs: - reservation = db_api.reservation_get(alloc['reservation_id']) + for reservation in reservations: if reservation['resource_type'] != plugin.RESOURCE_TYPE: continue - lease = db_api.lease_get(reservation['lease_id']) - host_reservation = None - pool = None - # Remove the failed host from the aggregate. - if reservation['status'] == status.reservation.ACTIVE: - host = db_api.host_get(alloc['compute_host_id']) - host_reservation = db_api.host_reservation_get( - reservation['resource_id']) - with trusts.create_ctx_from_trust(lease['trust_id']): - pool = nova.ReservationPool() - pool.remove_computehost(host_reservation['aggregate_id'], - host['service_name']) - - # Allocate alternative resource. - start_date = max(datetime.datetime.utcnow(), lease['start_date']) - new_hostids = self._matching_hosts( - reservation['hypervisor_properties'], - reservation['resource_properties'], - '1-1', start_date, lease['end_date'] - ) - if not new_hostids: - if reservation['id'] not in reservation_flags: - reservation_flags[reservation['id']] = {} - reservation_flags[reservation['id']].update( - {'missing_resources': True}) - db_api.host_allocation_destroy(alloc['id']) - LOG.warn('Could not find alternative host for reservation %s ' - '(lease: %s).', reservation['id'], lease['name']) - else: - new_hostid = new_hostids.pop() - db_api.host_allocation_update(alloc['id'], - {'compute_host_id': new_hostid}) - if reservation['status'] == status.reservation.ACTIVE: - # Add the alternative host into the aggregate. - new_host = db_api.host_get(new_hostid) - with trusts.create_ctx_from_trust(lease['trust_id']): - pool.add_computehost(host_reservation['aggregate_id'], - new_host['service_name']) + for allocation in [alloc for alloc + in reservation['computehost_allocations'] + if alloc['compute_host_id'] in host_ids]: + if self._reallocate(allocation): + if reservation['status'] == status.reservation.ACTIVE: + if reservation['id'] not in reservation_flags: + reservation_flags[reservation['id']] = {} + reservation_flags[reservation['id']].update( + {'resources_changed': True}) + else: if reservation['id'] not in reservation_flags: reservation_flags[reservation['id']] = {} reservation_flags[reservation['id']].update( - {'resources_changed': True}) - LOG.warn('Resource changed for reservation %s (lease: %s).', - reservation['id'], lease['name']) + {'missing_resources': True}) return reservation_flags + def _reallocate(self, allocation): + """Allocate an alternative host. + + :param: allocation: allocation to change. + :return: True if an alternative host was successfully allocated. + """ + reservation = db_api.reservation_get(allocation['reservation_id']) + h_reservation = db_api.host_reservation_get( + reservation['resource_id']) + lease = db_api.lease_get(reservation['lease_id']) + pool = nova.ReservationPool() + + # Remove the old host from the aggregate. + if reservation['status'] == status.reservation.ACTIVE: + host = db_api.host_get(allocation['compute_host_id']) + pool.remove_computehost(h_reservation['aggregate_id'], + host['service_name']) + + # Allocate an alternative host. + start_date = max(datetime.datetime.utcnow(), lease['start_date']) + new_hostids = self._matching_hosts( + reservation['hypervisor_properties'], + reservation['resource_properties'], + '1-1', start_date, lease['end_date'] + ) + if not new_hostids: + db_api.host_allocation_destroy(allocation['id']) + LOG.warn('Could not find alternative host for reservation %s ' + '(lease: %s).', reservation['id'], lease['name']) + return False + else: + new_hostid = new_hostids.pop() + db_api.host_allocation_update(allocation['id'], + {'compute_host_id': new_hostid}) + LOG.warn('Resource changed for reservation %s (lease: %s).', + reservation['id'], lease['name']) + if reservation['status'] == status.reservation.ACTIVE: + # Add the alternative host into the aggregate. + new_host = db_api.host_get(new_hostid) + pool.add_computehost(h_reservation['aggregate_id'], + new_host['service_name']) + + return True + def _get_extra_capabilities(self, host_id): extra_capabilities = {} raw_extra_capabilities = ( @@ -754,8 +775,31 @@ class PhysicalHostMonitorPlugin(base.BaseMonitorPlugin, host['hypervisor_hostname'], str(e)) # Heal related reservations + return self.heal() + + def get_healing_interval(self): + """Get interval of reservation healing in minutes.""" + return CONF[plugin.RESOURCE_TYPE].healing_interval + + def heal(self): + """Heal suffering reservations in the next healing interval. + + :return: a dictionary of {reservation id: flags to update} + """ reservation_flags = {} + hosts = db_api.unreservable_host_get_all_by_queries([]) + + interval_begin = datetime.datetime.utcnow() + interval = self.get_healing_interval() + if interval == 0: + interval_end = datetime.date.max + else: + interval_end = interval_begin + datetime.timedelta( + minutes=interval) + for handler in self.healing_handlers: - reservation_flags.update(handler(failed_hosts)) + reservation_flags.update(handler(hosts, + interval_begin, + interval_end)) return reservation_flags diff --git a/blazar/tests/db/sqlalchemy/test_utils.py b/blazar/tests/db/sqlalchemy/test_utils.py index c947cbad..5e34c4f1 100644 --- a/blazar/tests/db/sqlalchemy/test_utils.py +++ b/blazar/tests/db/sqlalchemy/test_utils.py @@ -110,9 +110,12 @@ class SQLAlchemyDBUtilsTestCase(tests.DBTestCase): _create_physical_lease(values=r2) _create_physical_lease(values=r3) - def check_reservation(self, expect, host_id, start, end): + def check_reservation(self, expect, host_ids, start, end): expect.sort(key=lambda x: x['lease_id']) - ret = db_utils.get_reservations_by_host_id(host_id, start, end) + if isinstance(host_ids, list): + ret = db_utils.get_reservations_by_host_ids(host_ids, start, end) + else: + ret = db_utils.get_reservations_by_host_id(host_ids, start, end) for i, res in enumerate(sorted(ret, key=lambda x: x['lease_id'])): self.assertEqual(expect[i]['lease_id'], res['lease_id']) @@ -377,5 +380,24 @@ class SQLAlchemyDBUtilsTestCase(tests.DBTestCase): self.check_reservation(expected, 'r1', '2030-01-01 08:00', '2030-01-01 17:00') + def test_get_reservations_by_host_ids(self): + self._setup_leases() + + self.check_reservation([], ['r1', 'r2'], + '2030-01-01 07:00', '2030-01-01 08:59') + + ret = db_api.reservation_get_all_by_lease_id('lease1') + self.check_reservation(ret, ['r1', 'r2'], + '2030-01-01 08:00', '2030-01-01 10:00') + + ret = db_api.reservation_get_all_by_lease_id('lease1') + ret.extend(db_api.reservation_get_all_by_lease_id('lease2')) + ret.extend(db_api.reservation_get_all_by_lease_id('lease3')) + self.check_reservation(ret, ['r1', 'r2'], + '2030-01-01 08:00', '2030-01-01 15:30') + + self.check_reservation([], ['r4'], + '2030-01-01 07:00', '2030-01-01 15:00') + # TODO(frossigneux) longest_availability # TODO(frossigneux) shortest_availability diff --git a/blazar/tests/monitor/test_base.py b/blazar/tests/monitor/test_base.py new file mode 100644 index 00000000..5532b093 --- /dev/null +++ b/blazar/tests/monitor/test_base.py @@ -0,0 +1,102 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock +from oslo_service import threadgroup + +from blazar.db import api as db_api +from blazar.monitor import base as base_monitor +from blazar.plugins import base +from blazar import tests + + +HEALING_INTERVAL = 10 + + +class DummyMonitorPlugin(base.BaseMonitorPlugin): + def is_notification_enabled(self): + return True + + def get_notification_event_types(self): + return [] + + def get_notification_topics(self): + return [] + + def notification_callback(self, event_type, message): + return {} + + def is_polling_enabled(self): + return False + + def get_polling_interval(self): + return 0 + + def poll(self): + return {} + + def get_healing_interval(self): + return HEALING_INTERVAL + + def heal(self): + return {} + + +class BaseMonitorTestCase(tests.TestCase): + def setUp(self): + super(BaseMonitorTestCase, self).setUp() + self.monitor_plugins = [DummyMonitorPlugin()] + self.monitor = base_monitor.BaseMonitor(self.monitor_plugins) + + def test_start_periodic_healing(self): + add_timer = self.patch(threadgroup.ThreadGroup, 'add_timer') + + self.monitor.start_periodic_healing() + add_timer.assert_called_once_with( + HEALING_INTERVAL * 60, self.monitor.call_monitor_plugin, None, + self.monitor_plugins[0].heal) + + def test_stop_periodic_healing(self): + dummy_timer = mock.Mock() + timer_done = self.patch(threadgroup.ThreadGroup, 'timer_done') + self.monitor.healing_timers.append(dummy_timer) + + self.monitor.stop_monitoring() + timer_done.assert_called_once_with(dummy_timer) + + def test_call_monitor_plugin(self): + callback = self.patch(DummyMonitorPlugin, + 'notification_callback') + callback.return_value = { + 'dummy_id1': {'missing_resources': True} + } + update_flags = self.patch(self.monitor, '_update_flags') + + self.monitor.call_monitor_plugin(callback, 'event_type1', 'hello') + callback.assert_called_once_with('event_type1', 'hello') + update_flags.assert_called_once_with( + {'dummy_id1': {'missing_resources': True}}) + + def test_call_update_flags(self): + reservation_update = self.patch(db_api, 'reservation_update') + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = { + 'lease_id': 'dummy_id2' + } + lease_update = self.patch(db_api, 'lease_update') + + self.monitor._update_flags({'dummy_id1': {'missing_resources': True}}) + reservation_update.assert_called_once_with( + 'dummy_id1', {'missing_resources': True}) + reservation_get.assert_called_once_with('dummy_id1') + lease_update.assert_called_once_with('dummy_id2', + {'degraded': True}) diff --git a/blazar/tests/monitor/test_notification_monitor.py b/blazar/tests/monitor/test_notification_monitor.py index 052c09ea..e98bbc9c 100644 --- a/blazar/tests/monitor/test_notification_monitor.py +++ b/blazar/tests/monitor/test_notification_monitor.py @@ -9,10 +9,8 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. - import oslo_messaging -from blazar.db import api as db_api from blazar.monitor import notification_monitor from blazar.plugins import base from blazar import tests @@ -40,6 +38,12 @@ class DummyMonitorPlugin(base.BaseMonitorPlugin): def poll(self): return {} + def get_healing_interval(self): + return 0 + + def heal(self): + return {} + class NotificationMonitorTestCase(tests.TestCase): def setUp(self): @@ -68,27 +72,6 @@ class NotificationMonitorTestCase(tests.TestCase): self.monitor._get_endpoints(self.plugins) endpoint.assert_called_once() - def test_update_statuses(self): - callback = self.patch(DummyMonitorPlugin, - 'notification_callback') - callback.return_value = { - 'dummy_id1': {'missing_resources': True} - } - reservation_update = self.patch(db_api, 'reservation_update') - reservation_get = self.patch(db_api, 'reservation_get') - reservation_get.return_value = { - 'lease_id': 'dummy_id2' - } - lease_update = self.patch(db_api, 'lease_update') - - self.monitor.update_statuses(callback, 'event_type1', 'hello') - callback.assert_called_once_with('event_type1', 'hello') - reservation_update.assert_called_once_with( - 'dummy_id1', {'missing_resources': True}) - reservation_get.assert_called_once_with('dummy_id1') - lease_update.assert_called_once_with('dummy_id2', - {'degraded': True}) - class NotificationEndpointTestCase(tests.TestCase): def setUp(self): diff --git a/blazar/tests/monitor/test_polling_monitor.py b/blazar/tests/monitor/test_polling_monitor.py index a55f8da3..30032a71 100644 --- a/blazar/tests/monitor/test_polling_monitor.py +++ b/blazar/tests/monitor/test_polling_monitor.py @@ -10,13 +10,19 @@ # License for the specific language governing permissions and limitations # under the License. +import mock from oslo_service import threadgroup +from blazar.monitor import base as base_monitor from blazar.monitor import polling_monitor from blazar.plugins import base from blazar import tests +POLLING_INTERVAL = 10 +HEALING_INTERVAL = 10 + + class DummyMonitorPlugin(base.BaseMonitorPlugin): def is_notification_enabled(self): return True @@ -34,11 +40,17 @@ class DummyMonitorPlugin(base.BaseMonitorPlugin): return False def get_polling_interval(self): - return 0 + return POLLING_INTERVAL def poll(self): return {} + def get_healing_interval(self): + return HEALING_INTERVAL + + def heal(self): + return {} + class PollingHandlerTestCase(tests.TestCase): def setUp(self): @@ -48,14 +60,18 @@ class PollingHandlerTestCase(tests.TestCase): def test_start_monitoring(self): add_timer = self.patch(threadgroup.ThreadGroup, 'add_timer') + self.patch(base_monitor.BaseMonitor, 'start_monitoring') self.monitor.start_monitoring() add_timer.assert_called_once_with( - self.monitor_plugins[0].get_polling_interval(), - self.monitor.update_statuses, 0, self.monitor_plugins[0].poll) + POLLING_INTERVAL, self.monitor.call_monitor_plugin, None, + self.monitor_plugins[0].poll) def test_stop_monitoring(self): - stop = self.patch(threadgroup.ThreadGroup, 'stop') + dummy_timer = mock.Mock() + timer_done = self.patch(threadgroup.ThreadGroup, 'timer_done') + self.monitor.polling_timers.append(dummy_timer) + self.patch(base_monitor.BaseMonitor, 'stop_monitoring') self.monitor.stop_monitoring() - stop.assert_called_once() + timer_done.assert_called_once_with(dummy_timer) diff --git a/blazar/tests/plugins/instances/test_instance_plugin.py b/blazar/tests/plugins/instances/test_instance_plugin.py index bdd77b09..e23dd438 100644 --- a/blazar/tests/plugins/instances/test_instance_plugin.py +++ b/blazar/tests/plugins/instances/test_instance_plugin.py @@ -835,16 +835,8 @@ class TestVirtualInstancePlugin(tests.TestCase): def test_heal_reservations_before_start_and_resources_changed(self): plugin = instance_plugin.VirtualInstancePlugin() - failed_hosts = [{'id': 1}] - new_host_ids = [2] - alloc_get = self.patch(db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(db_api, 'host_allocation_destroy') - reservation_get = self.patch(db_api, 'reservation_get') - reservation_get.return_value = { + failed_host = {'id': '1'} + dummy_reservation = { 'id': 'rsrv-1', 'resource_type': instance_plugin.RESOURCE_TYPE, 'lease_id': 'lease-1', @@ -854,43 +846,30 @@ class TestVirtualInstancePlugin(tests.TestCase): 'disk_gb': 256, 'aggregate_id': 'agg-1', 'affinity': False, - 'amount': 3} - host_get = self.patch(db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - mock_pool = self.patch(nova, 'ReservationPool') - mock_pool.return_value = mock.MagicMock() - lease_get = self.patch(db_api, 'lease_get') - lease_get.return_value = { - 'name': 'lease-name', - 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} - pickup_hosts = self.patch(plugin, 'pickup_hosts') - pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} - alloc_update = self.patch(db_api, 'host_allocation_update') + 'amount': 3, + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(plugin, '_reallocate') + reallocate.return_value = True - with mock.patch.object(datetime, 'datetime', - mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 11, 00) - result = plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_not_called() - pickup_hosts.assert_called_once() - alloc_update.assert_called_once_with('alloc-1', - {'compute_host_id': 2}) + result = plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) self.assertEqual({}, result) def test_heal_reservations_before_start_and_missing_resources(self): plugin = instance_plugin.VirtualInstancePlugin() - failed_hosts = [{'id': 1}] - new_host_ids = [] - alloc_get = self.patch(db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(db_api, 'host_allocation_destroy') - reservation_get = self.patch(db_api, 'reservation_get') - reservation_get.return_value = { + failed_host = {'id': '1'} + dummy_reservation = { 'id': 'rsrv-1', 'resource_type': instance_plugin.RESOURCE_TYPE, 'lease_id': 'lease-1', @@ -900,42 +879,32 @@ class TestVirtualInstancePlugin(tests.TestCase): 'disk_gb': 256, 'aggregate_id': 'agg-1', 'affinity': False, - 'amount': 3} - host_get = self.patch(db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - mock_pool = self.patch(nova, 'ReservationPool') - mock_pool.return_value = mock.MagicMock() - lease_get = self.patch(db_api, 'lease_get') - lease_get.return_value = { - 'name': 'lease-name', - 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} - pickup_hosts = self.patch(plugin, 'pickup_hosts') - pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} - alloc_update = self.patch(db_api, 'host_allocation_update') + 'amount': 3, + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(plugin, '_reallocate') + reallocate.return_value = False - with mock.patch.object(datetime, 'datetime', - mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 11, 00) - result = plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_called_once_with('alloc-1') - pickup_hosts.assert_called_once() - alloc_update.assert_not_called() - self.assertEqual({'rsrv-1': {'missing_resources': True}}, result) + result = plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'missing_resources': True}}, + result) def test_heal_active_reservations_and_resources_changed(self): plugin = instance_plugin.VirtualInstancePlugin() - failed_hosts = [{'id': 1}] - new_host_ids = [2] - alloc_get = self.patch(db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(db_api, 'host_allocation_destroy') - reservation_get = self.patch(db_api, 'reservation_get') - reservation_get.return_value = { + failed_host = {'id': '1'} + dummy_reservation = { 'id': 'rsrv-1', 'resource_type': instance_plugin.RESOURCE_TYPE, 'lease_id': 'lease-1', @@ -945,47 +914,32 @@ class TestVirtualInstancePlugin(tests.TestCase): 'disk_gb': 256, 'aggregate_id': 'agg-1', 'affinity': False, - 'amount': 3} - host_get = self.patch(db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - fake_pool = mock.MagicMock() - mock_pool = self.patch(nova, 'ReservationPool') - mock_pool.return_value = fake_pool - lease_get = self.patch(db_api, 'lease_get') - lease_get.return_value = { - 'name': 'lease-name', - 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} - pickup_hosts = self.patch(plugin, 'pickup_hosts') - pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} - alloc_update = self.patch(db_api, 'host_allocation_update') + 'amount': 3, + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(plugin, '_reallocate') + reallocate.return_value = True - with mock.patch.object(datetime, 'datetime', - mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 13, 00) - result = plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_not_called() - pickup_hosts.assert_called_once() - alloc_update.assert_called_once_with('alloc-1', - {'compute_host_id': 2}) - fake_pool.add_computehost.assert_called_once_with('agg-1', - 'compute', - stay_in=True) - self.assertEqual({'rsrv-1': {'resources_changed': True}}, result) + result = plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'resources_changed': True}}, + result) def test_heal_active_reservations_and_missing_resources(self): plugin = instance_plugin.VirtualInstancePlugin() - failed_hosts = [{'id': 1}] - new_host_ids = [] - alloc_get = self.patch(db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(db_api, 'host_allocation_destroy') - reservation_get = self.patch(db_api, 'reservation_get') - reservation_get.return_value = { + failed_host = {'id': '1'} + dummy_reservation = { 'id': 'rsrv-1', 'resource_type': instance_plugin.RESOURCE_TYPE, 'lease_id': 'lease-1', @@ -995,27 +949,177 @@ class TestVirtualInstancePlugin(tests.TestCase): 'disk_gb': 256, 'aggregate_id': 'agg-1', 'affinity': False, - 'amount': 3} - host_get = self.patch(db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - fake_pool = mock.MagicMock() - mock_pool = self.patch(nova, 'ReservationPool') - mock_pool.return_value = fake_pool - lease_get = self.patch(db_api, 'lease_get') - lease_get.return_value = { + 'amount': 3, + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(plugin, '_reallocate') + reallocate.return_value = False + + result = plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'missing_resources': True}}, + result) + + def test_reallocate_before_start(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_host = {'id': '1'} + new_host = {'id': '2'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3 + } + dummy_lease = { 'name': 'lease-name', 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00)} + 'end_date': datetime.datetime(2020, 1, 2, 12, 00), + 'trust_id': 'trust-1' + } + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = dummy_lease pickup_hosts = self.patch(plugin, 'pickup_hosts') - pickup_hosts.return_value = {'added': new_host_ids, 'removed': []} + pickup_hosts.return_value = {'added': [new_host['id']], 'removed': []} alloc_update = self.patch(db_api, 'host_allocation_update') with mock.patch.object(datetime, 'datetime', mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 13, 00) - result = plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_called_once_with('alloc-1') + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 11, 00) + result = plugin._reallocate(dummy_allocation) + pickup_hosts.assert_called_once() - alloc_update.assert_not_called() - self.assertEqual({'rsrv-1': {'missing_resources': True}}, result) + alloc_update.assert_called_once_with( + dummy_allocation['id'], + {'compute_host_id': new_host['id']}) + self.assertEqual(True, result) + + def test_reallocate_active(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_host = {'id': '1', + 'service_name': 'compute-1'} + new_host = {'id': '2', + 'service_name': 'compute-2'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3 + } + dummy_lease = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00), + 'trust_id': 'trust-1' + } + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = dummy_lease + host_get = self.patch(db_api, 'host_get') + host_get.return_value = failed_host + fake_pool = mock.MagicMock() + mock_pool = self.patch(nova, 'ReservationPool') + mock_pool.return_value = fake_pool + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': [new_host['id']], 'removed': []} + alloc_update = self.patch(db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 13, 00) + result = plugin._reallocate(dummy_allocation) + + fake_pool.remove_computehost.assert_called_once_with( + dummy_reservation['aggregate_id'], + failed_host['service_name']) + pickup_hosts.assert_called_once() + alloc_update.assert_called_once_with( + dummy_allocation['id'], + {'compute_host_id': new_host['id']}) + fake_pool.add_computehost.assert_called_once_with( + dummy_reservation['aggregate_id'], + failed_host['service_name'], + stay_in=True) + self.assertEqual(True, result) + + def test_reallocate_missing_resources(self): + plugin = instance_plugin.VirtualInstancePlugin() + failed_host = {'id': '1', + 'service_name': 'compute-1'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': instance_plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'vcpus': 2, + 'memory_mb': 1024, + 'disk_gb': 256, + 'aggregate_id': 'agg-1', + 'affinity': False, + 'amount': 3 + } + dummy_lease = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00), + 'trust_id': 'trust-1' + } + reservation_get = self.patch(db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + lease_get = self.patch(db_api, 'lease_get') + lease_get.return_value = dummy_lease + pickup_hosts = self.patch(plugin, 'pickup_hosts') + pickup_hosts.return_value = {'added': [], 'removed': []} + alloc_destroy = self.patch(db_api, 'host_allocation_destroy') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 11, 00) + result = plugin._reallocate(dummy_allocation) + + pickup_hosts.assert_called_once() + alloc_destroy.assert_called_once_with(dummy_allocation['id']) + self.assertEqual(False, result) diff --git a/blazar/tests/plugins/test_physical_host_plugin.py b/blazar/tests/plugins/test_physical_host_plugin.py index fcfa9cf2..2ed38d61 100644 --- a/blazar/tests/plugins/test_physical_host_plugin.py +++ b/blazar/tests/plugins/test_physical_host_plugin.py @@ -1380,185 +1380,290 @@ class PhysicalHostPluginTestCase(tests.TestCase): delete_pool.assert_called_with(1) def test_heal_reservations_before_start_and_resources_changed(self): - failed_hosts = [{'id': '1'}] - new_hostid = '2' - alloc_get = self.patch(self.db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy') - reservation_get = self.patch(self.db_api, 'reservation_get') - reservation_get.return_value = {'id': 'rsrv-1', - 'resource_type': plugin.RESOURCE_TYPE, - 'lease_id': 'lease-1', - 'status': 'pending', - 'hypervisor_properties': [], - 'resource_properties': [], - 'resource_id': 'resource-1'} - host_get = self.patch(self.db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - host_reservation_get = self.patch(self.db_api, 'host_reservation_get') - host_reservation_get.return_value = {'aggregate_id': 1} - lease_get = self.patch(self.db_api, 'lease_get') - lease_get.return_value = { - 'name': 'lease-name', - 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00), - 'trust_id': 'trust-1'} - matching_hosts = self.patch(host_plugin.PhysicalHostPlugin, - '_matching_hosts') - matching_hosts.return_value = [new_hostid] - alloc_update = self.patch(self.db_api, 'host_allocation_update') + failed_host = {'id': '1'} + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1', + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(self.db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(self.fake_phys_plugin, '_reallocate') + reallocate.return_value = True - with mock.patch.object(datetime, 'datetime', - mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 11, 00) - result = self.fake_phys_plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_not_called() - matching_hosts.assert_called_once_with( - [], [], '1-1', + result = self.fake_phys_plugin.heal_reservations( + [failed_host], datetime.datetime(2020, 1, 1, 12, 00), - datetime.datetime(2020, 1, 2, 12, 00)) - alloc_update.assert_called_once_with('alloc-1', - {'compute_host_id': new_hostid}) + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) self.assertEqual({}, result) def test_heal_reservations_before_start_and_missing_resources(self): - failed_hosts = [{'id': '1'}] - alloc_get = self.patch(self.db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy') - reservation_get = self.patch(self.db_api, 'reservation_get') - reservation_get.return_value = {'id': 'rsrv-1', - 'resource_type': plugin.RESOURCE_TYPE, - 'lease_id': 'lease-1', - 'status': 'pending', - 'hypervisor_properties': [], - 'resource_properties': [], - 'resource_id': 'resource-1'} - host_get = self.patch(self.db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - host_reservation_get = self.patch(self.db_api, 'host_reservation_get') - host_reservation_get.return_value = {'aggregate_id': 1} - lease_get = self.patch(self.db_api, 'lease_get') - lease_get.return_value = { - 'name': 'lease-name', - 'start_date': datetime.datetime(2020, 1, 1, 12, 00), - 'end_date': datetime.datetime(2020, 1, 2, 12, 00), - 'trust_id': 'trust-1'} - matching_hosts = self.patch(host_plugin.PhysicalHostPlugin, - '_matching_hosts') - matching_hosts.return_value = [] - alloc_update = self.patch(self.db_api, 'host_allocation_update') + failed_host = {'id': '1'} + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1', + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(self.db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(self.fake_phys_plugin, '_reallocate') + reallocate.return_value = False - with mock.patch.object(datetime, 'datetime', - mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 11, 00) - result = self.fake_phys_plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_called_once_with('alloc-1') - matching_hosts.assert_called_once_with( - [], [], '1-1', + result = self.fake_phys_plugin.heal_reservations( + [failed_host], datetime.datetime(2020, 1, 1, 12, 00), - datetime.datetime(2020, 1, 2, 12, 00)) - alloc_update.assert_not_called() - self.assertEqual({'rsrv-1': {'missing_resources': True}}, result) + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'missing_resources': True}}, + result) def test_heal_active_reservations_and_resources_changed(self): - failed_hosts = [{'id': '1'}] - new_hostid = '2' - alloc_get = self.patch(self.db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy') - reservation_get = self.patch(self.db_api, 'reservation_get') - reservation_get.return_value = {'id': 'rsrv-1', - 'resource_type': plugin.RESOURCE_TYPE, - 'lease_id': 'lease-1', - 'status': 'active', - 'hypervisor_properties': [], - 'resource_properties': [], - 'resource_id': 'resource-1'} - host_get = self.patch(self.db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - host_reservation_get = self.patch(self.db_api, 'host_reservation_get') - host_reservation_get.return_value = {'aggregate_id': 1} - lease_get = self.patch(self.db_api, 'lease_get') - lease_get.return_value = { + failed_host = {'id': '1'} + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1', + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(self.db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(self.fake_phys_plugin, '_reallocate') + reallocate.return_value = True + + result = self.fake_phys_plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'resources_changed': True}}, + result) + + def test_heal_active_reservations_and_missing_resources(self): + failed_host = {'id': '1'} + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1', + 'computehost_allocations': [{ + 'id': 'alloc-1', 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + }] + } + get_reservations = self.patch(self.db_utils, + 'get_reservations_by_host_ids') + get_reservations.return_value = [dummy_reservation] + reallocate = self.patch(self.fake_phys_plugin, '_reallocate') + reallocate.return_value = False + + result = self.fake_phys_plugin.heal_reservations( + [failed_host], + datetime.datetime(2020, 1, 1, 12, 00), + datetime.datetime(2020, 1, 1, 13, 00)) + reallocate.assert_called_once_with( + dummy_reservation['computehost_allocations'][0]) + self.assertEqual( + {dummy_reservation['id']: {'missing_resources': True}}, + result) + + def test_reallocate_before_start(self): + failed_host = {'id': '1'} + new_host = {'id': '2'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1' + } + dummy_host_reservation = { + 'aggregate_id': 1 + } + dummy_lease = { 'name': 'lease-name', 'start_date': datetime.datetime(2020, 1, 1, 12, 00), 'end_date': datetime.datetime(2020, 1, 2, 12, 00), - 'trust_id': 'trust-1'} + 'trust_id': 'trust-1' + } + reservation_get = self.patch(self.db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + host_reservation_get = self.patch(self.db_api, 'host_reservation_get') + host_reservation_get.return_value = dummy_host_reservation + lease_get = self.patch(self.db_api, 'lease_get') + lease_get.return_value = dummy_lease matching_hosts = self.patch(host_plugin.PhysicalHostPlugin, '_matching_hosts') - matching_hosts.return_value = [new_hostid] + matching_hosts.return_value = [new_host['id']] alloc_update = self.patch(self.db_api, 'host_allocation_update') with mock.patch.object(datetime, 'datetime', mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 13, 00) - result = self.fake_phys_plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_not_called() - matching_hosts.assert_called_once_with( - [], [], '1-1', - datetime.datetime(2020, 1, 1, 13, 00), - datetime.datetime(2020, 1, 2, 12, 00)) - alloc_update.assert_called_once_with('alloc-1', - {'compute_host_id': new_hostid}) - self.add_compute_host.assert_called_once_with(1, 'compute') - self.assertEqual({'rsrv-1': {'resources_changed': True}}, result) + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 11, 00) + result = self.fake_phys_plugin._reallocate(dummy_allocation) - def test_heal_active_reservations_and_missing_resources(self): - failed_hosts = [{'id': '1'}] - alloc_get = self.patch(self.db_api, - 'host_allocation_get_all_by_values') - alloc_get.return_value = [{'id': 'alloc-1', - 'compute_host_id': '1', - 'reservation_id': 'rsrv-1'}] - alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy') - reservation_get = self.patch(self.db_api, 'reservation_get') - reservation_get.return_value = {'id': 'rsrv-1', - 'resource_type': plugin.RESOURCE_TYPE, - 'lease_id': 'lease-1', - 'status': 'pending', - 'hypervisor_properties': [], - 'resource_properties': [], - 'resource_id': 'resource-1'} - host_get = self.patch(self.db_api, 'host_get') - host_get.return_value = {'service_name': 'compute'} - host_reservation_get = self.patch(self.db_api, 'host_reservation_get') - host_reservation_get.return_value = {'aggregate_id': 1} - lease_get = self.patch(self.db_api, 'lease_get') - lease_get.return_value = { + matching_hosts.assert_called_once_with( + dummy_reservation['hypervisor_properties'], + dummy_reservation['resource_properties'], + '1-1', dummy_lease['start_date'], dummy_lease['end_date']) + alloc_update.assert_called_once_with( + dummy_allocation['id'], + {'compute_host_id': new_host['id']}) + self.assertEqual(True, result) + + def test_reallocate_active(self): + failed_host = {'id': '1', + 'service_name': 'compute-1'} + new_host = {'id': '2', + 'service_name': 'compute-2'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'active', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1' + } + dummy_host_reservation = { + 'aggregate_id': 1 + } + dummy_lease = { 'name': 'lease-name', 'start_date': datetime.datetime(2020, 1, 1, 12, 00), 'end_date': datetime.datetime(2020, 1, 2, 12, 00), - 'trust_id': 'trust-1'} + 'trust_id': 'trust-1' + } + reservation_get = self.patch(self.db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + lease_get = self.patch(self.db_api, 'lease_get') + lease_get.return_value = dummy_lease + host_reservation_get = self.patch(self.db_api, 'host_reservation_get') + host_reservation_get.return_value = dummy_host_reservation + host_get = self.patch(self.db_api, 'host_get') + host_get.side_effect = [failed_host, new_host] + matching_hosts = self.patch(host_plugin.PhysicalHostPlugin, + '_matching_hosts') + matching_hosts.return_value = [new_host['id']] + alloc_update = self.patch(self.db_api, 'host_allocation_update') + + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 13, 00) + result = self.fake_phys_plugin._reallocate(dummy_allocation) + + self.remove_compute_host.assert_called_once_with( + dummy_host_reservation['aggregate_id'], + failed_host['service_name']) + matching_hosts.assert_called_once_with( + dummy_reservation['hypervisor_properties'], + dummy_reservation['resource_properties'], + '1-1', datetime.datetime(2020, 1, 1, 13, 00), + dummy_lease['end_date']) + alloc_update.assert_called_once_with( + dummy_allocation['id'], + {'compute_host_id': new_host['id']}) + self.add_compute_host( + dummy_host_reservation['aggregate_id'], + new_host['service_name']) + self.assertEqual(True, result) + + def test_reallocate_missing_resources(self): + failed_host = {'id': '1'} + dummy_allocation = { + 'id': 'alloc-1', + 'compute_host_id': failed_host['id'], + 'reservation_id': 'rsrv-1' + } + dummy_reservation = { + 'id': 'rsrv-1', + 'resource_type': plugin.RESOURCE_TYPE, + 'lease_id': 'lease-1', + 'status': 'pending', + 'hypervisor_properties': [], + 'resource_properties': [], + 'resource_id': 'resource-1' + } + dummy_host_reservation = { + 'aggregate_id': 1 + } + dummy_lease = { + 'name': 'lease-name', + 'start_date': datetime.datetime(2020, 1, 1, 12, 00), + 'end_date': datetime.datetime(2020, 1, 2, 12, 00), + 'trust_id': 'trust-1' + } + reservation_get = self.patch(self.db_api, 'reservation_get') + reservation_get.return_value = dummy_reservation + host_reservation_get = self.patch(self.db_api, 'host_reservation_get') + host_reservation_get.return_value = dummy_host_reservation + lease_get = self.patch(self.db_api, 'lease_get') + lease_get.return_value = dummy_lease matching_hosts = self.patch(host_plugin.PhysicalHostPlugin, '_matching_hosts') matching_hosts.return_value = [] - alloc_update = self.patch(self.db_api, 'host_allocation_update') + alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy') with mock.patch.object(datetime, 'datetime', mock.Mock(wraps=datetime.datetime)) as patched: - patched.utcnow.return_value = datetime.datetime(2020, 1, 1, - 13, 00) - result = self.fake_phys_plugin.heal_reservations(failed_hosts) - alloc_destroy.assert_called_once_with('alloc-1') + patched.utcnow.return_value = datetime.datetime( + 2020, 1, 1, 11, 00) + result = self.fake_phys_plugin._reallocate(dummy_allocation) + matching_hosts.assert_called_once_with( - [], [], '1-1', - datetime.datetime(2020, 1, 1, 13, 00), - datetime.datetime(2020, 1, 2, 12, 00)) - alloc_update.assert_not_called() - self.assertEqual({'rsrv-1': {'missing_resources': True}}, result) + dummy_reservation['hypervisor_properties'], + dummy_reservation['resource_properties'], + '1-1', dummy_lease['start_date'], dummy_lease['end_date']) + alloc_destroy.assert_called_once_with(dummy_allocation['id']) + self.assertEqual(False, result) def test_matching_hosts_not_allocated_hosts(self): def host_allocation_get_all_by_values(**kwargs): @@ -1865,19 +1970,43 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase): self.assertEqual(([], hosts), result) def test_handle_failures(self): - hosts = [ + failed_hosts = [ + {'id': '1', + 'hypervisor_hostname': 'compute-1'} + ] + host_update = self.patch(db_api, 'host_update') + heal = self.patch(self.host_monitor_plugin, 'heal') + + self.host_monitor_plugin._handle_failures(failed_hosts) + host_update.assert_called_once_with(failed_hosts[0]['id'], + {'reservable': False}) + heal.assert_called_once() + + def test_heal(self): + failed_hosts = [ {'id': '1', 'hypervisor_hostname': 'compute-1'} ] reservation_flags = { 'rsrv-1': {'missing_resources': True} } - host_update = self.patch(db_api, 'host_update') - heal_reservations = self.patch(host_plugin.PhysicalHostPlugin, - 'heal_reservations') - heal_reservations.return_value = reservation_flags - self.host_monitor_plugin.healing_handlers = [heal_reservations] + hosts_get = self.patch(db_api, 'unreservable_host_get_all_by_queries') + hosts_get.return_value = failed_hosts + get_healing_interval = self.patch(self.host_monitor_plugin, + 'get_healing_interval') + get_healing_interval.return_value = 60 + healing_handler = mock.Mock() + healing_handler.return_value = reservation_flags + self.host_monitor_plugin.healing_handlers = [healing_handler] + start_date = datetime.datetime(2020, 1, 1, 12, 00) - result = self.host_monitor_plugin._handle_failures(hosts) - host_update.assert_called_once_with('1', {'reservable': False}) + with mock.patch.object(datetime, 'datetime', + mock.Mock(wraps=datetime.datetime)) as patched: + patched.utcnow.return_value = start_date + result = self.host_monitor_plugin.heal() + + healing_handler.assert_called_once_with( + failed_hosts, start_date, + start_date + datetime.timedelta(minutes=60) + ) self.assertEqual(reservation_flags, result) diff --git a/blazar/tests/utils/openstack/test_nova.py b/blazar/tests/utils/openstack/test_nova.py index 4773d155..27225eb2 100644 --- a/blazar/tests/utils/openstack/test_nova.py +++ b/blazar/tests/utils/openstack/test_nova.py @@ -379,12 +379,10 @@ class ReservationPoolTestCase(tests.TestCase): def test_remove_computehosts_with_duplicate_host(self): self._patch_get_aggregate_from_name_or_id() - self.nova.aggregates.add_host.side_effect = ( - nova_exceptions.Conflict(409)) - self.assertRaises(manager_exceptions.CantAddHost, - self.pool.remove_computehost, - 'pool', - 'host3') + add_host = self.nova.aggregates.add_host + + self.pool.remove_computehost('pool', 'host3') + add_host.assert_not_called() def test_get_computehosts_with_correct_pool(self): self._patch_get_aggregate_from_name_or_id() diff --git a/blazar/utils/openstack/nova.py b/blazar/utils/openstack/nova.py index e3139898..0d7167f3 100644 --- a/blazar/utils/openstack/nova.py +++ b/blazar/utils/openstack/nova.py @@ -397,7 +397,7 @@ class ReservationPool(NovaClientWrapper): self.nova.aggregates.remove_host(agg.id, host) except nova_exception.ClientException: hosts_failing_to_remove.append(host) - if freepool_agg.id != agg.id: + if freepool_agg.id != agg.id and host not in freepool_agg.hosts: # NOTE(sbauza) : We don't want to put again the host in # freepool if the requested pool is the freepool... try: diff --git a/doc/source/images/healing_flow.png b/doc/source/images/healing_flow.png new file mode 100644 index 00000000..f3ad0fb2 Binary files /dev/null and b/doc/source/images/healing_flow.png differ diff --git a/doc/source/images/source/README.rst b/doc/source/images/source/README.rst new file mode 100644 index 00000000..67333185 --- /dev/null +++ b/doc/source/images/source/README.rst @@ -0,0 +1,8 @@ +Image sources +============= + +Images are drawn by `draw.io`_ . To edit images, open `draw.io`_, +select *Open Existing Diagram* and chose the *\*.xml* file under this +directory. + +.. _draw.io: diff --git a/doc/source/images/statuses_source/event_statuses.xml b/doc/source/images/source/event_statuses.xml similarity index 100% rename from doc/source/images/statuses_source/event_statuses.xml rename to doc/source/images/source/event_statuses.xml diff --git a/doc/source/images/source/healing_flow.xml b/doc/source/images/source/healing_flow.xml new file mode 100644 index 00000000..3b5e1ac7 --- /dev/null +++ b/doc/source/images/source/healing_flow.xml @@ -0,0 +1 @@ +7ZtLc+I4EIB/yx44LoVfgI9JJpk5zFalNlv7OG0pWBhNZIuSRYD99SthyQ/JBpPYmExMqlJ2W5ZEf91StyRGzl20+0rBevUbCSAe2ZNgN3K+jGx77tj8vxDslcBKBSFFQSoqCJ7Qf1AKJ1K6QQFMSgUZIZihdVm4IHEMF6wkA5SSbbnYkuByq2sQQkPwtADYlP6FArZKpbPJJJd/gyhcyZYd9eAZLF5CSjaxbG5kO8vDJ30cAVWVLJ+sQEC2BZFzP3LuKCEsvYp2dxALzSqtpe891DzNuk1hzJq84KQvvAK8kd/8DxRB2Te2V+pgcMeru12xCHOBxS8TRskLvCOYUC6JScxL3i4RxpoIYBTG/HbBOwS5/PYVUoa4om/kgwgFgWjmdrtCDD6twUK0ueVWxWUHTULR14monsRMmoply3vV3EgwEB8uN5Ug9SLahruCSCrlKyQRZHTPi6inCqi0X2sq77e5NbhStCoYguIKpP2FWc05BH4hOVQzmRpMfocJ2VCuGHtyY7DJdWTVaLEWW1FpRXa51TZXugE6xCBJJLkWiLgaEMsEMqsA4rQAZFYFBNJXwBCJf7WOEJm0SOR+Kv7eQ6QNv7BPY8h8p20Oc0PTMOCjtrwllK1ISGKA73Pp7WJDXzPnKIMpQIA7xP6WYnH9j7gezzxxG/NeZs/ETeHhD8jYXrIAG0a4KO/Gd0LWsv6zRyrp78XxgAEaQlYSia9/lBqFmBvpa3lSew8ChbvGF+zP5At2E1+omiTa8AXLOgrC+Uwg3D4HJcs2QDxQaIZQ3ej/4fDpWf/O9LT+p12p34xgP5v6vQbm73WlftdQ/8ieYpEsIH4RHrIGiFEcipfEd+YlVQleeVbInNrj4EYkckJZIpJEiwMYQJkpLqA7e6Y9OYdKrdrNlKpkjWde2eAj4dopxFmaS7kaqzQ+kC8V8zutHnt+vJ40qDDq4SoG+0KxtSiQVLekypDlMoGlWg7Gk2mymT2ZsfYxy5BJZgCSVRbkdWoMF6Lv+SVqb8avV9QQ/1vAzQdwprtd2m/fAE6Na58bnDtpCZxeUXfgXDPyUX0xlyQ+1gLeEqP1N/UOv/5T9rnWis5ZwHA10M54UvhYxixfFblaLYROrlfLz/TAgZ96OptfCz+/lp+ZfQ/81MTm6QNtX/w8M9RU/NyBX22uP7sWflPT/wxs52X9ai9uou1enETdyn5JC3A8fSFGBSMFHPaRIOpdi/NmOGLg+Nw5/amYvuMQs7ecfm4GOscs4yfNMPRVOtvxx37x8zZj0KvtLt+Ym/vSg4Of5ZgdM+3PwYdFu4OttbX2o1U0786lh0U7Lna6GZn1ajvEWBUJG5swYNiFea9L6wg7NoXeBvQs88gN6gtk4pylUGl2Fm0JEN5Q+IthNTyTWotLnuVismGnU681pIj3T+RB6qXHXKRlwRfcnV2TBInzFLbsZ5JWIgwmeyZ6NXZ9MTAiynWUirYwYccM/owVE6tsPNbM9IHKYyZtZHV+g90BtaDxHTxD/Jhr5ZkwRqKKFQ9GKvfb1UFbO83FDxYU7UJxvnj8DPgYM+a1vJiLIle4JW9pzOwKZlWJeBvHR33TeUfOjfhG3KNIwIdq0aacDTSUwzKXeuroAD3T6aqOFLWxsuXX7+x4A7I6ZK5aZ+h9ZdI/b8HjQhHUhY4u6IeRppMqCmenxPrK5rSzaLri/H0h+FkQbpH7IeLpMuJxddhVEU/V4NtGxGM3yImHiMecMLV9vez+AhFPxQ80ekQWk3+TffRM8Mfg5vfHbXZV3D6Mq9mzHpE12HQdkJm75j162bwiHxyQnUTm6csuF0TmX9XAmEQIw70ZlNYdl7hGmq7+89HuaPLb/Pfbaf6R/0Teuf8f \ No newline at end of file diff --git a/doc/source/images/statuses_source/lease_statuses.xml b/doc/source/images/source/lease_statuses.xml similarity index 100% rename from doc/source/images/statuses_source/lease_statuses.xml rename to doc/source/images/source/lease_statuses.xml diff --git a/doc/source/images/statuses_source/reservation_statuses.xml b/doc/source/images/source/reservation_statuses.xml similarity index 100% rename from doc/source/images/statuses_source/reservation_statuses.xml rename to doc/source/images/source/reservation_statuses.xml diff --git a/doc/source/images/statuses_source/README.rst b/doc/source/images/statuses_source/README.rst deleted file mode 100644 index c71e1809..00000000 --- a/doc/source/images/statuses_source/README.rst +++ /dev/null @@ -1,8 +0,0 @@ -Status graphs -============= - -Status graphs are drawn by `draw.io`_ . To edit graphs, open `draw.io`_, -select *Open Existing Diagram* and chose the *\*_statuses.xml* file under this -directory. - -.. _draw.io: diff --git a/doc/source/user/compute-host-monitor.rst b/doc/source/user/compute-host-monitor.rst index 621d2d72..09199f73 100644 --- a/doc/source/user/compute-host-monitor.rst +++ b/doc/source/user/compute-host-monitor.rst @@ -30,11 +30,13 @@ Reservation Healing If a host failure is detected, Blazar tries to heal host/instance reservations which use the failed host by reserving alternative host. +The length of the *healing interval* can be configured by the +*healing_interval* option. Configurations ============== -To enable the compute host monitor, enable **enable_notification_monitor** -or **enable_polling_monitor** option. +To enable the compute host monitor, enable *enable_notification_monitor* +or *enable_polling_monitor* option, and set *healing_interval* as +appropriate for your cloud. See also the :doc:`../configuration/blazar-conf` in detail. -detail diff --git a/doc/source/user/resource-monitoring.rst b/doc/source/user/resource-monitoring.rst index 337ab531..5c2af39b 100644 --- a/doc/source/user/resource-monitoring.rst +++ b/doc/source/user/resource-monitoring.rst @@ -6,7 +6,7 @@ Blazar monitors states of resources and heals reservations which are expected to suffer from resource failure. Resource specific functionality, e.g., calling Nova APIs, is provided as a monitoring plugin. -The following sections describes the resource monitoring feature in detail. +The following sections describe the resource monitoring feature in detail. Monitoring Type =============== @@ -34,7 +34,48 @@ Healing ======= When the monitor detects a resource failure, it heals reservations which -are expected to suffer from the failure. +are expected to suffer from the failure. Note that it does not immediately +heal all of reservations for the failed resource because the resource is +expected to recover sometime in the future, i.e., the monitor heals only +reservations which are active or will start soon. + +In addition, the monitor periodically checks validity of reservations and +heals invalid reservations. +Therefore, even though the failed resource did not recover in the last +interval, the periodic task heals invalid reservations which will start in the +next interval. + +The healing flow is as follows: + +1. Resource A is reserved for the *Reservation-1*, *Reservation-2* and + *Reservation-3* as shown in the following diagram. + +2. At the point 1, the periodic task in the manager checks if there is any + reservation to heal and it detects there is not. + +3. At the point 2, the manager detects a failure of the resource A. Then, it + heals active reservations and reservations which will start in the + *healing interval*. In this case, *Reservation-1* and *Reservation-2* are + healed immediately. + +4. At the point 3, the periodic task checks if there is any reservation to + heal. In this case, the task finds out there is no reservation to heal + because the resource has not yet recovered but no reservation will + start in next interval. *Reservation-2* has been already healed in step 3. + +5. At the point 4, the periodic task checks if there is any reservation to + heal again. + In this case, the task finds out *Reservation-3* needs to be healed because + it will start in the next interval and the resource has not yet recovered. + +6. Before the point 5, the manager detects a recovery of the resource. + +7. At the point 5, the periodic task finds out there is no failed resource and + nothing to do. + +.. image:: ../images/healing_flow.png + :align: center + :width: 600 px Flags =====