Add periodic healing

This patch adds a periodic healing mechanism into the monitor module and
monitoring plugins. With this change, the heal_reservations() method of
resource plugin was changed to receive a period (start/end_date
arguments) to heal.

This change is for not healing (reallocating) all of reservations for
failed resources immediately because failed resources are expected to
recover sometime in the future. The monitor tries to heal only
reservations which are active or will start soon. Remaining reservations
are expected to be healed by the periodic healing.

Implements: blueprint healing-time
Change-Id: I6971c952fcde101ff2408f567fee9a7dab97b140
This commit is contained in:
Hiroaki Kobayashi 2017-12-21 10:31:46 +09:00
parent ad575d3d32
commit 6d2950f3b0
29 changed files with 1004 additions and 459 deletions

View File

@ -359,6 +359,12 @@ def reservable_host_get_all_by_queries(queries):
return IMPL.reservable_host_get_all_by_queries(queries)
@to_dict
def unreservable_host_get_all_by_queries(queries):
"""Returns unreservable hosts filtered by an array of queries."""
return IMPL.unreservable_host_get_all_by_queries(queries)
def host_destroy(host_id):
"""Delete specific Compute host."""
IMPL.host_destroy(host_id)

View File

@ -688,6 +688,20 @@ def reservable_host_get_all_by_queries(queries):
return host_get_all_by_queries(queries)
def unreservable_host_get_all_by_queries(queries):
"""Returns unreservable hosts filtered by an array of queries.
:param queries: array of queries "key op value" where op can be
http://docs.sqlalchemy.org/en/rel_0_7/core/expression_api.html
#sqlalchemy.sql.operators.ColumnOperators
"""
# TODO(hiro-kobayashi): support the expression 'reservable == False'
queries.append('reservable == 0')
return host_get_all_by_queries(queries)
def host_create(values):
values = values.copy()
host = models.ComputeHost()

View File

@ -99,7 +99,7 @@ class Reservation(mb.BlazarBase):
backref='reservation',
lazy='joined')
computehost_allocations = relationship('ComputeHostAllocation',
uselist=False,
uselist=True,
cascade="all,delete",
backref='reservation',
lazy='joined')

View File

@ -70,6 +70,20 @@ def get_reservations_by_host_id(host_id, start_date, end_date):
return query.all()
def get_reservations_by_host_ids(host_ids, start_date, end_date):
session = get_session()
border0 = sa.and_(models.Lease.start_date < start_date,
models.Lease.end_date < start_date)
border1 = sa.and_(models.Lease.start_date > end_date,
models.Lease.end_date > end_date)
query = (session.query(models.Reservation).join(models.Lease)
.join(models.ComputeHostAllocation)
.filter(models.ComputeHostAllocation.compute_host_id
.in_(host_ids))
.filter(~sa.or_(border0, border1)))
return query.all()
def get_free_periods(resource_id, start_date, end_date, duration):
"""Returns a list of free periods."""
reserved_periods = get_reserved_periods(resource_id,

View File

@ -107,6 +107,10 @@ def get_reservations_by_host_id(host_id, start_date, end_date):
return IMPL.get_reservations_by_host_id(host_id, start_date, end_date)
def get_reservations_by_host_ids(host_ids, start_date, end_date):
return IMPL.get_reservations_by_host_ids(host_ids, start_date, end_date)
def get_free_periods(resource_id, start_date, end_date, duration):
"""Returns a list of free periods."""
return IMPL.get_free_periods(resource_id, start_date, end_date, duration)

View File

@ -175,3 +175,8 @@ class InvalidRange(exceptions.BlazarException):
class CantUpdateParameter(exceptions.BlazarException):
code = 409
msg_fmt = _("%(param)s cannot be updated")
class InvalidPeriod(exceptions.BlazarException):
code = 400
msg_fmt = _('The end_date must be later than the start_date.')

View File

@ -11,32 +11,48 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
from oslo_log import log as logging
import six
from oslo_service import threadgroup
from blazar.db import api as db_api
LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class BaseMonitor(object):
"""Base class for monitoring classes."""
@abc.abstractmethod
def __init__(self, monitor_plugins):
self.monitor_plugins = monitor_plugins
self.tg = threadgroup.ThreadGroup()
self.healing_timers = []
def start_monitoring(self):
"""Start monitoring."""
pass
self.start_periodic_healing()
@abc.abstractmethod
def stop_monitoring(self):
"""Stop monitoring."""
pass
self.stop_periodic_healing()
def update_statuses(self, callback, *args, **kwargs):
"""Update leases and reservations table after executing a callback."""
def start_periodic_healing(self):
"""Start periodic healing process."""
for plugin in self.monitor_plugins:
healing_interval_mins = plugin.get_healing_interval()
if healing_interval_mins > 0:
self.healing_timers.append(
self.tg.add_timer(healing_interval_mins * 60,
self.call_monitor_plugin,
None,
plugin.heal))
def stop_periodic_healing(self):
"""Stop periodic healing process."""
for timer in self.healing_timers:
self.tg.timer_done(timer)
def call_monitor_plugin(self, callback, *args, **kwargs):
"""Call a callback and update lease/reservation flags."""
try:
# The callback() has to return a dictionary of
# {reservation id: flags to update}.
@ -46,17 +62,20 @@ class BaseMonitor(object):
LOG.exception('Caught an exception while executing a callback. '
'%s', str(e))
# TODO(hiro-kobayashi): update statuses of related leases and
# reservations. Depends on the state-machine blueprint.
if reservation_flags:
self._update_flags(reservation_flags)
# Update flags of related leases and reservations.
def _update_flags(self, reservation_flags):
"""Update lease/reservation flags."""
lease_ids = set([])
for reservation_id, flags in reservation_flags.items():
db_api.reservation_update(reservation_id, flags)
LOG.debug('Reservation %s was updated: %s',
reservation_id, flags)
reservation = db_api.reservation_get(reservation_id)
lease_ids.add(reservation['lease_id'])
for lease_id in lease_ids:
LOG.debug('Lease %s was updated: {"degraded": True}', lease_id)
db_api.lease_update(lease_id, {'degraded': True})

View File

@ -28,6 +28,7 @@ class NotificationMonitor(base.BaseMonitor):
def __init__(self, monitor_plugins):
"""Initialize a notification monitor."""
LOG.debug('Initializing a notification monitor...')
super(NotificationMonitor, self).__init__(monitor_plugins)
try:
self.handlers = defaultdict(list)
self.listener = oslo_messaging.get_notification_listener(
@ -46,6 +47,7 @@ class NotificationMonitor(base.BaseMonitor):
LOG.debug('Starting a notification monitor...')
try:
self.listener.start()
super(NotificationMonitor, self).start_monitoring()
except Exception as e:
LOG.exception('Failed to start a notification monitor. (%s)',
str(e))
@ -55,6 +57,7 @@ class NotificationMonitor(base.BaseMonitor):
LOG.debug('Stopping a notification monitor...')
try:
self.listener.stop()
super(NotificationMonitor, self).stop_monitoring()
except Exception as e:
LOG.exception('Failed to stop a notification monitor. (%s)',
str(e))
@ -85,9 +88,9 @@ class NotificationMonitor(base.BaseMonitor):
for plugin in monitor_plugins:
for event_type in plugin.get_notification_event_types():
self.handlers[event_type].append(
# Wrap a notification callback with the update_statuses()
# to manage statuses of leases and reservations.
lambda e_type, payload: self.update_statuses(
# Wrap the notification callback with the
# call_monitor_plugin() to manage lease/reservation flags.
lambda e_type, payload: self.call_monitor_plugin(
plugin.notification_callback, e_type, payload))
return [NotificationEndpoint(self)]

View File

@ -12,7 +12,6 @@
# limitations under the License.
from oslo_log import log as logging
from oslo_service import threadgroup
from blazar.monitor import base
@ -24,19 +23,23 @@ class PollingMonitor(base.BaseMonitor):
def __init__(self, monitor_plugins):
"""Initialize a polling monitor."""
self.monitor_plugins = monitor_plugins
self.tg = threadgroup.ThreadGroup()
LOG.debug('Initializing a polling monitor...')
super(PollingMonitor, self).__init__(monitor_plugins)
self.polling_timers = []
def start_monitoring(self):
"""Start polling."""
LOG.debug('Starting a polling monitor...')
try:
for plugin in self.monitor_plugins:
# Set poll() timer. The poll() is wrapped with the
# update_statuses() to manage statuses of leases and
# reservations.
self.tg.add_timer(plugin.get_polling_interval(),
self.update_statuses, 0, plugin.poll)
# Set polling timer. Wrap the monitor plugin method with the
# call_monitor_plugin() to manage lease/reservation flags.
self.polling_timers.append(
self.tg.add_timer(plugin.get_polling_interval(),
self.call_monitor_plugin, None,
plugin.poll))
super(PollingMonitor, self).start_monitoring()
except Exception as e:
LOG.exception('Failed to start a polling monitor. (%s)',
str(e))
@ -45,6 +48,8 @@ class PollingMonitor(base.BaseMonitor):
"""Stop polling."""
LOG.debug('Stopping a polling monitor...')
try:
self.tg.stop()
for timer in self.polling_timers:
self.tg.timer_done(timer)
super(PollingMonitor, self).stop_monitoring()
except Exception as e:
LOG.exception('Failed to stop a polling monitor. (%s)', str(e))

View File

@ -86,10 +86,13 @@ class BasePlugin(object):
"""Take actions before the end of a lease"""
pass
def heal_reservations(self, failed_resources):
def heal_reservations(self, failed_resources, interval_begin,
interval_end):
"""Heal reservations which suffer from resource failures.
:param: failed_resources: failed resources
:param: interval_begin: start date of the period to heal.
:param: interval_end: end date of the period to heal.
:return: a dictionary of {reservation id: flags to update}
e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657':
{'missing_resources': True}}
@ -148,3 +151,15 @@ class BaseMonitorPlugin():
{'missing_resources': True}}
"""
pass
@abc.abstractmethod
def get_healing_interval(self):
"""Get interval of reservation healing in minutes."""
pass
@abc.abstractmethod
def heal(self):
"""Heal suffering reservations.
:return: a dictionary of {reservation id: flags to update}
"""

View File

@ -469,68 +469,86 @@ class VirtualInstancePlugin(base.BasePlugin, nova.NovaClientWrapper):
self.cleanup_resources(instance_reservation)
def heal_reservations(self, failed_resources):
def heal_reservations(self, failed_resources, interval_begin,
interval_end):
"""Heal reservations which suffer from resource failures.
:param: failed_resources: a list of failed hosts.
:param: failed_resources: failed resources
:param: interval_begin: start date of the period to heal.
:param: interval_end: end date of the period to heal.
:return: a dictionary of {reservation id: flags to update}
e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657':
{'missing_resources': True}}
"""
reservation_flags = {}
failed_allocs = []
for host in failed_resources:
failed_allocs += db_api.host_allocation_get_all_by_values(
compute_host_id=host['id'])
host_ids = [h['id'] for h in failed_resources]
reservations = db_utils.get_reservations_by_host_ids(host_ids,
interval_begin,
interval_end)
for alloc in failed_allocs:
reservation = db_api.reservation_get(alloc['reservation_id'])
for reservation in reservations:
if reservation['resource_type'] != RESOURCE_TYPE:
continue
pool = None
# Remove the failed host from the aggregate.
if reservation['status'] == status.reservation.ACTIVE:
host = db_api.host_get(alloc['compute_host_id'])
pool = nova.ReservationPool()
pool.remove_computehost(reservation['aggregate_id'],
host['service_name'])
# Allocate alternative resource.
values = {}
lease = db_api.lease_get(reservation['lease_id'])
values['start_date'] = max(datetime.datetime.utcnow(),
lease['start_date'])
values['end_date'] = lease['end_date']
specs = ['vcpus', 'memory_mb', 'disk_gb', 'affinity', 'amount']
for key in specs:
values[key] = reservation[key]
changed_hosts = self.pickup_hosts(reservation['id'], values)
if len(changed_hosts['added']) == 0:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'missing_resources': True})
db_api.host_allocation_destroy(alloc['id'])
LOG.warn('Could not find alternative host for reservation %s '
'(lease: %s).', reservation['id'], lease['name'])
else:
new_host_id = changed_hosts['added'].pop()
db_api.host_allocation_update(
alloc['id'], {'compute_host_id': new_host_id})
if reservation['status'] == status.reservation.ACTIVE:
# Add the alternative host into the aggregate.
new_host = db_api.host_get(new_host_id)
pool.add_computehost(reservation['aggregate_id'],
new_host['service_name'],
stay_in=True)
for allocation in [alloc for alloc
in reservation['computehost_allocations']
if alloc['compute_host_id'] in host_ids]:
if self._reallocate(allocation):
if reservation['status'] == status.reservation.ACTIVE:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'resources_changed': True})
else:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'resources_changed': True})
LOG.warn('Resource changed for reservation %s (lease: %s).',
reservation['id'], lease['name'])
{'missing_resources': True})
return reservation_flags
def _reallocate(self, allocation):
"""Allocate an alternative host.
:param: allocation: allocation to change.
:return: True if an alternative host was successfully allocated.
"""
reservation = db_api.reservation_get(allocation['reservation_id'])
pool = nova.ReservationPool()
# Remove the failed host from the aggregate.
if reservation['status'] == status.reservation.ACTIVE:
host = db_api.host_get(allocation['compute_host_id'])
pool.remove_computehost(reservation['aggregate_id'],
host['service_name'])
# Allocate an alternative host.
values = {}
lease = db_api.lease_get(reservation['lease_id'])
values['start_date'] = max(datetime.datetime.utcnow(),
lease['start_date'])
values['end_date'] = lease['end_date']
specs = ['vcpus', 'memory_mb', 'disk_gb', 'affinity', 'amount']
for key in specs:
values[key] = reservation[key]
changed_hosts = self.pickup_hosts(reservation['id'], values)
if len(changed_hosts['added']) == 0:
db_api.host_allocation_destroy(allocation['id'])
LOG.warn('Could not find alternative host for reservation %s '
'(lease: %s).', reservation['id'], lease['name'])
return False
else:
new_host_id = changed_hosts['added'].pop()
db_api.host_allocation_update(
allocation['id'], {'compute_host_id': new_host_id})
if reservation['status'] == status.reservation.ACTIVE:
# Add the alternative host into the aggregate.
new_host = db_api.host_get(new_host_id)
pool.add_computehost(reservation['aggregate_id'],
new_host['service_name'],
stay_in=True)
LOG.warn('Resource changed for reservation %s (lease: %s).',
reservation['id'], lease['name'])
return True

View File

@ -66,6 +66,11 @@ plugin_opts = [
cfg.IntOpt('polling_interval',
default=60,
help='Interval (seconds) of polling for health checking.'),
cfg.IntOpt('healing_interval',
default=60,
help='Interval (minutes) of reservation healing. '
'If 0 is specified, the interval is infinite and all the '
'reservations in the future is healed at one time.'),
]
CONF = cfg.CONF
@ -210,73 +215,89 @@ class PhysicalHostPlugin(base.BasePlugin, nova.NovaClientWrapper):
except manager_ex.AggregateNotFound:
pass
def heal_reservations(self, failed_resources):
def heal_reservations(self, failed_resources, interval_begin,
interval_end):
"""Heal reservations which suffer from resource failures.
:param: failed_resources: a list of failed hosts.
:param: interval_begin: start date of the period to heal.
:param: interval_end: end date of the period to heal.
:return: a dictionary of {reservation id: flags to update}
e.g. {'de27786d-bd96-46bb-8363-19c13b2c6657':
{'missing_resources': True}}
"""
reservation_flags = {}
failed_allocs = []
for host in failed_resources:
failed_allocs += db_api.host_allocation_get_all_by_values(
compute_host_id=host['id'])
host_ids = [h['id'] for h in failed_resources]
reservations = db_utils.get_reservations_by_host_ids(host_ids,
interval_begin,
interval_end)
for alloc in failed_allocs:
reservation = db_api.reservation_get(alloc['reservation_id'])
for reservation in reservations:
if reservation['resource_type'] != plugin.RESOURCE_TYPE:
continue
lease = db_api.lease_get(reservation['lease_id'])
host_reservation = None
pool = None
# Remove the failed host from the aggregate.
if reservation['status'] == status.reservation.ACTIVE:
host = db_api.host_get(alloc['compute_host_id'])
host_reservation = db_api.host_reservation_get(
reservation['resource_id'])
with trusts.create_ctx_from_trust(lease['trust_id']):
pool = nova.ReservationPool()
pool.remove_computehost(host_reservation['aggregate_id'],
host['service_name'])
# Allocate alternative resource.
start_date = max(datetime.datetime.utcnow(), lease['start_date'])
new_hostids = self._matching_hosts(
reservation['hypervisor_properties'],
reservation['resource_properties'],
'1-1', start_date, lease['end_date']
)
if not new_hostids:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'missing_resources': True})
db_api.host_allocation_destroy(alloc['id'])
LOG.warn('Could not find alternative host for reservation %s '
'(lease: %s).', reservation['id'], lease['name'])
else:
new_hostid = new_hostids.pop()
db_api.host_allocation_update(alloc['id'],
{'compute_host_id': new_hostid})
if reservation['status'] == status.reservation.ACTIVE:
# Add the alternative host into the aggregate.
new_host = db_api.host_get(new_hostid)
with trusts.create_ctx_from_trust(lease['trust_id']):
pool.add_computehost(host_reservation['aggregate_id'],
new_host['service_name'])
for allocation in [alloc for alloc
in reservation['computehost_allocations']
if alloc['compute_host_id'] in host_ids]:
if self._reallocate(allocation):
if reservation['status'] == status.reservation.ACTIVE:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'resources_changed': True})
else:
if reservation['id'] not in reservation_flags:
reservation_flags[reservation['id']] = {}
reservation_flags[reservation['id']].update(
{'resources_changed': True})
LOG.warn('Resource changed for reservation %s (lease: %s).',
reservation['id'], lease['name'])
{'missing_resources': True})
return reservation_flags
def _reallocate(self, allocation):
"""Allocate an alternative host.
:param: allocation: allocation to change.
:return: True if an alternative host was successfully allocated.
"""
reservation = db_api.reservation_get(allocation['reservation_id'])
h_reservation = db_api.host_reservation_get(
reservation['resource_id'])
lease = db_api.lease_get(reservation['lease_id'])
pool = nova.ReservationPool()
# Remove the old host from the aggregate.
if reservation['status'] == status.reservation.ACTIVE:
host = db_api.host_get(allocation['compute_host_id'])
pool.remove_computehost(h_reservation['aggregate_id'],
host['service_name'])
# Allocate an alternative host.
start_date = max(datetime.datetime.utcnow(), lease['start_date'])
new_hostids = self._matching_hosts(
reservation['hypervisor_properties'],
reservation['resource_properties'],
'1-1', start_date, lease['end_date']
)
if not new_hostids:
db_api.host_allocation_destroy(allocation['id'])
LOG.warn('Could not find alternative host for reservation %s '
'(lease: %s).', reservation['id'], lease['name'])
return False
else:
new_hostid = new_hostids.pop()
db_api.host_allocation_update(allocation['id'],
{'compute_host_id': new_hostid})
LOG.warn('Resource changed for reservation %s (lease: %s).',
reservation['id'], lease['name'])
if reservation['status'] == status.reservation.ACTIVE:
# Add the alternative host into the aggregate.
new_host = db_api.host_get(new_hostid)
pool.add_computehost(h_reservation['aggregate_id'],
new_host['service_name'])
return True
def _get_extra_capabilities(self, host_id):
extra_capabilities = {}
raw_extra_capabilities = (
@ -754,8 +775,31 @@ class PhysicalHostMonitorPlugin(base.BaseMonitorPlugin,
host['hypervisor_hostname'], str(e))
# Heal related reservations
return self.heal()
def get_healing_interval(self):
"""Get interval of reservation healing in minutes."""
return CONF[plugin.RESOURCE_TYPE].healing_interval
def heal(self):
"""Heal suffering reservations in the next healing interval.
:return: a dictionary of {reservation id: flags to update}
"""
reservation_flags = {}
hosts = db_api.unreservable_host_get_all_by_queries([])
interval_begin = datetime.datetime.utcnow()
interval = self.get_healing_interval()
if interval == 0:
interval_end = datetime.date.max
else:
interval_end = interval_begin + datetime.timedelta(
minutes=interval)
for handler in self.healing_handlers:
reservation_flags.update(handler(failed_hosts))
reservation_flags.update(handler(hosts,
interval_begin,
interval_end))
return reservation_flags

View File

@ -110,9 +110,12 @@ class SQLAlchemyDBUtilsTestCase(tests.DBTestCase):
_create_physical_lease(values=r2)
_create_physical_lease(values=r3)
def check_reservation(self, expect, host_id, start, end):
def check_reservation(self, expect, host_ids, start, end):
expect.sort(key=lambda x: x['lease_id'])
ret = db_utils.get_reservations_by_host_id(host_id, start, end)
if isinstance(host_ids, list):
ret = db_utils.get_reservations_by_host_ids(host_ids, start, end)
else:
ret = db_utils.get_reservations_by_host_id(host_ids, start, end)
for i, res in enumerate(sorted(ret, key=lambda x: x['lease_id'])):
self.assertEqual(expect[i]['lease_id'], res['lease_id'])
@ -377,5 +380,24 @@ class SQLAlchemyDBUtilsTestCase(tests.DBTestCase):
self.check_reservation(expected, 'r1',
'2030-01-01 08:00', '2030-01-01 17:00')
def test_get_reservations_by_host_ids(self):
self._setup_leases()
self.check_reservation([], ['r1', 'r2'],
'2030-01-01 07:00', '2030-01-01 08:59')
ret = db_api.reservation_get_all_by_lease_id('lease1')
self.check_reservation(ret, ['r1', 'r2'],
'2030-01-01 08:00', '2030-01-01 10:00')
ret = db_api.reservation_get_all_by_lease_id('lease1')
ret.extend(db_api.reservation_get_all_by_lease_id('lease2'))
ret.extend(db_api.reservation_get_all_by_lease_id('lease3'))
self.check_reservation(ret, ['r1', 'r2'],
'2030-01-01 08:00', '2030-01-01 15:30')
self.check_reservation([], ['r4'],
'2030-01-01 07:00', '2030-01-01 15:00')
# TODO(frossigneux) longest_availability
# TODO(frossigneux) shortest_availability

View File

@ -0,0 +1,102 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import mock
from oslo_service import threadgroup
from blazar.db import api as db_api
from blazar.monitor import base as base_monitor
from blazar.plugins import base
from blazar import tests
HEALING_INTERVAL = 10
class DummyMonitorPlugin(base.BaseMonitorPlugin):
def is_notification_enabled(self):
return True
def get_notification_event_types(self):
return []
def get_notification_topics(self):
return []
def notification_callback(self, event_type, message):
return {}
def is_polling_enabled(self):
return False
def get_polling_interval(self):
return 0
def poll(self):
return {}
def get_healing_interval(self):
return HEALING_INTERVAL
def heal(self):
return {}
class BaseMonitorTestCase(tests.TestCase):
def setUp(self):
super(BaseMonitorTestCase, self).setUp()
self.monitor_plugins = [DummyMonitorPlugin()]
self.monitor = base_monitor.BaseMonitor(self.monitor_plugins)
def test_start_periodic_healing(self):
add_timer = self.patch(threadgroup.ThreadGroup, 'add_timer')
self.monitor.start_periodic_healing()
add_timer.assert_called_once_with(
HEALING_INTERVAL * 60, self.monitor.call_monitor_plugin, None,
self.monitor_plugins[0].heal)
def test_stop_periodic_healing(self):
dummy_timer = mock.Mock()
timer_done = self.patch(threadgroup.ThreadGroup, 'timer_done')
self.monitor.healing_timers.append(dummy_timer)
self.monitor.stop_monitoring()
timer_done.assert_called_once_with(dummy_timer)
def test_call_monitor_plugin(self):
callback = self.patch(DummyMonitorPlugin,
'notification_callback')
callback.return_value = {
'dummy_id1': {'missing_resources': True}
}
update_flags = self.patch(self.monitor, '_update_flags')
self.monitor.call_monitor_plugin(callback, 'event_type1', 'hello')
callback.assert_called_once_with('event_type1', 'hello')
update_flags.assert_called_once_with(
{'dummy_id1': {'missing_resources': True}})
def test_call_update_flags(self):
reservation_update = self.patch(db_api, 'reservation_update')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
'lease_id': 'dummy_id2'
}
lease_update = self.patch(db_api, 'lease_update')
self.monitor._update_flags({'dummy_id1': {'missing_resources': True}})
reservation_update.assert_called_once_with(
'dummy_id1', {'missing_resources': True})
reservation_get.assert_called_once_with('dummy_id1')
lease_update.assert_called_once_with('dummy_id2',
{'degraded': True})

View File

@ -9,10 +9,8 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import oslo_messaging
from blazar.db import api as db_api
from blazar.monitor import notification_monitor
from blazar.plugins import base
from blazar import tests
@ -40,6 +38,12 @@ class DummyMonitorPlugin(base.BaseMonitorPlugin):
def poll(self):
return {}
def get_healing_interval(self):
return 0
def heal(self):
return {}
class NotificationMonitorTestCase(tests.TestCase):
def setUp(self):
@ -68,27 +72,6 @@ class NotificationMonitorTestCase(tests.TestCase):
self.monitor._get_endpoints(self.plugins)
endpoint.assert_called_once()
def test_update_statuses(self):
callback = self.patch(DummyMonitorPlugin,
'notification_callback')
callback.return_value = {
'dummy_id1': {'missing_resources': True}
}
reservation_update = self.patch(db_api, 'reservation_update')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
'lease_id': 'dummy_id2'
}
lease_update = self.patch(db_api, 'lease_update')
self.monitor.update_statuses(callback, 'event_type1', 'hello')
callback.assert_called_once_with('event_type1', 'hello')
reservation_update.assert_called_once_with(
'dummy_id1', {'missing_resources': True})
reservation_get.assert_called_once_with('dummy_id1')
lease_update.assert_called_once_with('dummy_id2',
{'degraded': True})
class NotificationEndpointTestCase(tests.TestCase):
def setUp(self):

View File

@ -10,13 +10,19 @@
# License for the specific language governing permissions and limitations
# under the License.
import mock
from oslo_service import threadgroup
from blazar.monitor import base as base_monitor
from blazar.monitor import polling_monitor
from blazar.plugins import base
from blazar import tests
POLLING_INTERVAL = 10
HEALING_INTERVAL = 10
class DummyMonitorPlugin(base.BaseMonitorPlugin):
def is_notification_enabled(self):
return True
@ -34,11 +40,17 @@ class DummyMonitorPlugin(base.BaseMonitorPlugin):
return False
def get_polling_interval(self):
return 0
return POLLING_INTERVAL
def poll(self):
return {}
def get_healing_interval(self):
return HEALING_INTERVAL
def heal(self):
return {}
class PollingHandlerTestCase(tests.TestCase):
def setUp(self):
@ -48,14 +60,18 @@ class PollingHandlerTestCase(tests.TestCase):
def test_start_monitoring(self):
add_timer = self.patch(threadgroup.ThreadGroup, 'add_timer')
self.patch(base_monitor.BaseMonitor, 'start_monitoring')
self.monitor.start_monitoring()
add_timer.assert_called_once_with(
self.monitor_plugins[0].get_polling_interval(),
self.monitor.update_statuses, 0, self.monitor_plugins[0].poll)
POLLING_INTERVAL, self.monitor.call_monitor_plugin, None,
self.monitor_plugins[0].poll)
def test_stop_monitoring(self):
stop = self.patch(threadgroup.ThreadGroup, 'stop')
dummy_timer = mock.Mock()
timer_done = self.patch(threadgroup.ThreadGroup, 'timer_done')
self.monitor.polling_timers.append(dummy_timer)
self.patch(base_monitor.BaseMonitor, 'stop_monitoring')
self.monitor.stop_monitoring()
stop.assert_called_once()
timer_done.assert_called_once_with(dummy_timer)

View File

@ -835,16 +835,8 @@ class TestVirtualInstancePlugin(tests.TestCase):
def test_heal_reservations_before_start_and_resources_changed(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_hosts = [{'id': 1}]
new_host_ids = [2]
alloc_get = self.patch(db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(db_api, 'host_allocation_destroy')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
@ -854,43 +846,30 @@ class TestVirtualInstancePlugin(tests.TestCase):
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3}
host_get = self.patch(db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
mock_pool = self.patch(nova, 'ReservationPool')
mock_pool.return_value = mock.MagicMock()
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00)}
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': new_host_ids, 'removed': []}
alloc_update = self.patch(db_api, 'host_allocation_update')
'amount': 3,
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(plugin, '_reallocate')
reallocate.return_value = True
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
11, 00)
result = plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_not_called()
pickup_hosts.assert_called_once()
alloc_update.assert_called_once_with('alloc-1',
{'compute_host_id': 2})
result = plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual({}, result)
def test_heal_reservations_before_start_and_missing_resources(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_hosts = [{'id': 1}]
new_host_ids = []
alloc_get = self.patch(db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(db_api, 'host_allocation_destroy')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
@ -900,42 +879,32 @@ class TestVirtualInstancePlugin(tests.TestCase):
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3}
host_get = self.patch(db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
mock_pool = self.patch(nova, 'ReservationPool')
mock_pool.return_value = mock.MagicMock()
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00)}
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': new_host_ids, 'removed': []}
alloc_update = self.patch(db_api, 'host_allocation_update')
'amount': 3,
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(plugin, '_reallocate')
reallocate.return_value = False
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
11, 00)
result = plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_called_once_with('alloc-1')
pickup_hosts.assert_called_once()
alloc_update.assert_not_called()
self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)
result = plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'missing_resources': True}},
result)
def test_heal_active_reservations_and_resources_changed(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_hosts = [{'id': 1}]
new_host_ids = [2]
alloc_get = self.patch(db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(db_api, 'host_allocation_destroy')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
@ -945,47 +914,32 @@ class TestVirtualInstancePlugin(tests.TestCase):
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3}
host_get = self.patch(db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
fake_pool = mock.MagicMock()
mock_pool = self.patch(nova, 'ReservationPool')
mock_pool.return_value = fake_pool
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00)}
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': new_host_ids, 'removed': []}
alloc_update = self.patch(db_api, 'host_allocation_update')
'amount': 3,
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(plugin, '_reallocate')
reallocate.return_value = True
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
13, 00)
result = plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_not_called()
pickup_hosts.assert_called_once()
alloc_update.assert_called_once_with('alloc-1',
{'compute_host_id': 2})
fake_pool.add_computehost.assert_called_once_with('agg-1',
'compute',
stay_in=True)
self.assertEqual({'rsrv-1': {'resources_changed': True}}, result)
result = plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'resources_changed': True}},
result)
def test_heal_active_reservations_and_missing_resources(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_hosts = [{'id': 1}]
new_host_ids = []
alloc_get = self.patch(db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(db_api, 'host_allocation_destroy')
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = {
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
@ -995,27 +949,177 @@ class TestVirtualInstancePlugin(tests.TestCase):
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3}
host_get = self.patch(db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
fake_pool = mock.MagicMock()
mock_pool = self.patch(nova, 'ReservationPool')
mock_pool.return_value = fake_pool
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = {
'amount': 3,
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(plugin, '_reallocate')
reallocate.return_value = False
result = plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'missing_resources': True}},
result)
def test_reallocate_before_start(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_host = {'id': '1'}
new_host = {'id': '2'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'vcpus': 2,
'memory_mb': 1024,
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00)}
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'
}
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = dummy_lease
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': new_host_ids, 'removed': []}
pickup_hosts.return_value = {'added': [new_host['id']], 'removed': []}
alloc_update = self.patch(db_api, 'host_allocation_update')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
13, 00)
result = plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_called_once_with('alloc-1')
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 11, 00)
result = plugin._reallocate(dummy_allocation)
pickup_hosts.assert_called_once()
alloc_update.assert_not_called()
self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)
alloc_update.assert_called_once_with(
dummy_allocation['id'],
{'compute_host_id': new_host['id']})
self.assertEqual(True, result)
def test_reallocate_active(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_host = {'id': '1',
'service_name': 'compute-1'}
new_host = {'id': '2',
'service_name': 'compute-2'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'active',
'vcpus': 2,
'memory_mb': 1024,
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'
}
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = dummy_lease
host_get = self.patch(db_api, 'host_get')
host_get.return_value = failed_host
fake_pool = mock.MagicMock()
mock_pool = self.patch(nova, 'ReservationPool')
mock_pool.return_value = fake_pool
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': [new_host['id']], 'removed': []}
alloc_update = self.patch(db_api, 'host_allocation_update')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 13, 00)
result = plugin._reallocate(dummy_allocation)
fake_pool.remove_computehost.assert_called_once_with(
dummy_reservation['aggregate_id'],
failed_host['service_name'])
pickup_hosts.assert_called_once()
alloc_update.assert_called_once_with(
dummy_allocation['id'],
{'compute_host_id': new_host['id']})
fake_pool.add_computehost.assert_called_once_with(
dummy_reservation['aggregate_id'],
failed_host['service_name'],
stay_in=True)
self.assertEqual(True, result)
def test_reallocate_missing_resources(self):
plugin = instance_plugin.VirtualInstancePlugin()
failed_host = {'id': '1',
'service_name': 'compute-1'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': instance_plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'vcpus': 2,
'memory_mb': 1024,
'disk_gb': 256,
'aggregate_id': 'agg-1',
'affinity': False,
'amount': 3
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'
}
reservation_get = self.patch(db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
lease_get = self.patch(db_api, 'lease_get')
lease_get.return_value = dummy_lease
pickup_hosts = self.patch(plugin, 'pickup_hosts')
pickup_hosts.return_value = {'added': [], 'removed': []}
alloc_destroy = self.patch(db_api, 'host_allocation_destroy')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 11, 00)
result = plugin._reallocate(dummy_allocation)
pickup_hosts.assert_called_once()
alloc_destroy.assert_called_once_with(dummy_allocation['id'])
self.assertEqual(False, result)

View File

@ -1380,185 +1380,290 @@ class PhysicalHostPluginTestCase(tests.TestCase):
delete_pool.assert_called_with(1)
def test_heal_reservations_before_start_and_resources_changed(self):
failed_hosts = [{'id': '1'}]
new_hostid = '2'
alloc_get = self.patch(self.db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy')
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = {'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'}
host_get = self.patch(self.db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = {'aggregate_id': 1}
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'}
matching_hosts = self.patch(host_plugin.PhysicalHostPlugin,
'_matching_hosts')
matching_hosts.return_value = [new_hostid]
alloc_update = self.patch(self.db_api, 'host_allocation_update')
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1',
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(self.db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(self.fake_phys_plugin, '_reallocate')
reallocate.return_value = True
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
11, 00)
result = self.fake_phys_plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_not_called()
matching_hosts.assert_called_once_with(
[], [], '1-1',
result = self.fake_phys_plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 2, 12, 00))
alloc_update.assert_called_once_with('alloc-1',
{'compute_host_id': new_hostid})
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual({}, result)
def test_heal_reservations_before_start_and_missing_resources(self):
failed_hosts = [{'id': '1'}]
alloc_get = self.patch(self.db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy')
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = {'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'}
host_get = self.patch(self.db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = {'aggregate_id': 1}
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'}
matching_hosts = self.patch(host_plugin.PhysicalHostPlugin,
'_matching_hosts')
matching_hosts.return_value = []
alloc_update = self.patch(self.db_api, 'host_allocation_update')
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1',
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(self.db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(self.fake_phys_plugin, '_reallocate')
reallocate.return_value = False
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
11, 00)
result = self.fake_phys_plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_called_once_with('alloc-1')
matching_hosts.assert_called_once_with(
[], [], '1-1',
result = self.fake_phys_plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 2, 12, 00))
alloc_update.assert_not_called()
self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'missing_resources': True}},
result)
def test_heal_active_reservations_and_resources_changed(self):
failed_hosts = [{'id': '1'}]
new_hostid = '2'
alloc_get = self.patch(self.db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy')
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = {'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'active',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'}
host_get = self.patch(self.db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = {'aggregate_id': 1}
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = {
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'active',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1',
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(self.db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(self.fake_phys_plugin, '_reallocate')
reallocate.return_value = True
result = self.fake_phys_plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'resources_changed': True}},
result)
def test_heal_active_reservations_and_missing_resources(self):
failed_host = {'id': '1'}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'active',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1',
'computehost_allocations': [{
'id': 'alloc-1', 'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}]
}
get_reservations = self.patch(self.db_utils,
'get_reservations_by_host_ids')
get_reservations.return_value = [dummy_reservation]
reallocate = self.patch(self.fake_phys_plugin, '_reallocate')
reallocate.return_value = False
result = self.fake_phys_plugin.heal_reservations(
[failed_host],
datetime.datetime(2020, 1, 1, 12, 00),
datetime.datetime(2020, 1, 1, 13, 00))
reallocate.assert_called_once_with(
dummy_reservation['computehost_allocations'][0])
self.assertEqual(
{dummy_reservation['id']: {'missing_resources': True}},
result)
def test_reallocate_before_start(self):
failed_host = {'id': '1'}
new_host = {'id': '2'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'
}
dummy_host_reservation = {
'aggregate_id': 1
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'}
'trust_id': 'trust-1'
}
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = dummy_host_reservation
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = dummy_lease
matching_hosts = self.patch(host_plugin.PhysicalHostPlugin,
'_matching_hosts')
matching_hosts.return_value = [new_hostid]
matching_hosts.return_value = [new_host['id']]
alloc_update = self.patch(self.db_api, 'host_allocation_update')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
13, 00)
result = self.fake_phys_plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_not_called()
matching_hosts.assert_called_once_with(
[], [], '1-1',
datetime.datetime(2020, 1, 1, 13, 00),
datetime.datetime(2020, 1, 2, 12, 00))
alloc_update.assert_called_once_with('alloc-1',
{'compute_host_id': new_hostid})
self.add_compute_host.assert_called_once_with(1, 'compute')
self.assertEqual({'rsrv-1': {'resources_changed': True}}, result)
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 11, 00)
result = self.fake_phys_plugin._reallocate(dummy_allocation)
def test_heal_active_reservations_and_missing_resources(self):
failed_hosts = [{'id': '1'}]
alloc_get = self.patch(self.db_api,
'host_allocation_get_all_by_values')
alloc_get.return_value = [{'id': 'alloc-1',
'compute_host_id': '1',
'reservation_id': 'rsrv-1'}]
alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy')
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = {'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'}
host_get = self.patch(self.db_api, 'host_get')
host_get.return_value = {'service_name': 'compute'}
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = {'aggregate_id': 1}
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = {
matching_hosts.assert_called_once_with(
dummy_reservation['hypervisor_properties'],
dummy_reservation['resource_properties'],
'1-1', dummy_lease['start_date'], dummy_lease['end_date'])
alloc_update.assert_called_once_with(
dummy_allocation['id'],
{'compute_host_id': new_host['id']})
self.assertEqual(True, result)
def test_reallocate_active(self):
failed_host = {'id': '1',
'service_name': 'compute-1'}
new_host = {'id': '2',
'service_name': 'compute-2'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'active',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'
}
dummy_host_reservation = {
'aggregate_id': 1
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'}
'trust_id': 'trust-1'
}
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = dummy_lease
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = dummy_host_reservation
host_get = self.patch(self.db_api, 'host_get')
host_get.side_effect = [failed_host, new_host]
matching_hosts = self.patch(host_plugin.PhysicalHostPlugin,
'_matching_hosts')
matching_hosts.return_value = [new_host['id']]
alloc_update = self.patch(self.db_api, 'host_allocation_update')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 13, 00)
result = self.fake_phys_plugin._reallocate(dummy_allocation)
self.remove_compute_host.assert_called_once_with(
dummy_host_reservation['aggregate_id'],
failed_host['service_name'])
matching_hosts.assert_called_once_with(
dummy_reservation['hypervisor_properties'],
dummy_reservation['resource_properties'],
'1-1', datetime.datetime(2020, 1, 1, 13, 00),
dummy_lease['end_date'])
alloc_update.assert_called_once_with(
dummy_allocation['id'],
{'compute_host_id': new_host['id']})
self.add_compute_host(
dummy_host_reservation['aggregate_id'],
new_host['service_name'])
self.assertEqual(True, result)
def test_reallocate_missing_resources(self):
failed_host = {'id': '1'}
dummy_allocation = {
'id': 'alloc-1',
'compute_host_id': failed_host['id'],
'reservation_id': 'rsrv-1'
}
dummy_reservation = {
'id': 'rsrv-1',
'resource_type': plugin.RESOURCE_TYPE,
'lease_id': 'lease-1',
'status': 'pending',
'hypervisor_properties': [],
'resource_properties': [],
'resource_id': 'resource-1'
}
dummy_host_reservation = {
'aggregate_id': 1
}
dummy_lease = {
'name': 'lease-name',
'start_date': datetime.datetime(2020, 1, 1, 12, 00),
'end_date': datetime.datetime(2020, 1, 2, 12, 00),
'trust_id': 'trust-1'
}
reservation_get = self.patch(self.db_api, 'reservation_get')
reservation_get.return_value = dummy_reservation
host_reservation_get = self.patch(self.db_api, 'host_reservation_get')
host_reservation_get.return_value = dummy_host_reservation
lease_get = self.patch(self.db_api, 'lease_get')
lease_get.return_value = dummy_lease
matching_hosts = self.patch(host_plugin.PhysicalHostPlugin,
'_matching_hosts')
matching_hosts.return_value = []
alloc_update = self.patch(self.db_api, 'host_allocation_update')
alloc_destroy = self.patch(self.db_api, 'host_allocation_destroy')
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = datetime.datetime(2020, 1, 1,
13, 00)
result = self.fake_phys_plugin.heal_reservations(failed_hosts)
alloc_destroy.assert_called_once_with('alloc-1')
patched.utcnow.return_value = datetime.datetime(
2020, 1, 1, 11, 00)
result = self.fake_phys_plugin._reallocate(dummy_allocation)
matching_hosts.assert_called_once_with(
[], [], '1-1',
datetime.datetime(2020, 1, 1, 13, 00),
datetime.datetime(2020, 1, 2, 12, 00))
alloc_update.assert_not_called()
self.assertEqual({'rsrv-1': {'missing_resources': True}}, result)
dummy_reservation['hypervisor_properties'],
dummy_reservation['resource_properties'],
'1-1', dummy_lease['start_date'], dummy_lease['end_date'])
alloc_destroy.assert_called_once_with(dummy_allocation['id'])
self.assertEqual(False, result)
def test_matching_hosts_not_allocated_hosts(self):
def host_allocation_get_all_by_values(**kwargs):
@ -1865,19 +1970,43 @@ class PhysicalHostMonitorPluginTestCase(tests.TestCase):
self.assertEqual(([], hosts), result)
def test_handle_failures(self):
hosts = [
failed_hosts = [
{'id': '1',
'hypervisor_hostname': 'compute-1'}
]
host_update = self.patch(db_api, 'host_update')
heal = self.patch(self.host_monitor_plugin, 'heal')
self.host_monitor_plugin._handle_failures(failed_hosts)
host_update.assert_called_once_with(failed_hosts[0]['id'],
{'reservable': False})
heal.assert_called_once()
def test_heal(self):
failed_hosts = [
{'id': '1',
'hypervisor_hostname': 'compute-1'}
]
reservation_flags = {
'rsrv-1': {'missing_resources': True}
}
host_update = self.patch(db_api, 'host_update')
heal_reservations = self.patch(host_plugin.PhysicalHostPlugin,
'heal_reservations')
heal_reservations.return_value = reservation_flags
self.host_monitor_plugin.healing_handlers = [heal_reservations]
hosts_get = self.patch(db_api, 'unreservable_host_get_all_by_queries')
hosts_get.return_value = failed_hosts
get_healing_interval = self.patch(self.host_monitor_plugin,
'get_healing_interval')
get_healing_interval.return_value = 60
healing_handler = mock.Mock()
healing_handler.return_value = reservation_flags
self.host_monitor_plugin.healing_handlers = [healing_handler]
start_date = datetime.datetime(2020, 1, 1, 12, 00)
result = self.host_monitor_plugin._handle_failures(hosts)
host_update.assert_called_once_with('1', {'reservable': False})
with mock.patch.object(datetime, 'datetime',
mock.Mock(wraps=datetime.datetime)) as patched:
patched.utcnow.return_value = start_date
result = self.host_monitor_plugin.heal()
healing_handler.assert_called_once_with(
failed_hosts, start_date,
start_date + datetime.timedelta(minutes=60)
)
self.assertEqual(reservation_flags, result)

View File

@ -379,12 +379,10 @@ class ReservationPoolTestCase(tests.TestCase):
def test_remove_computehosts_with_duplicate_host(self):
self._patch_get_aggregate_from_name_or_id()
self.nova.aggregates.add_host.side_effect = (
nova_exceptions.Conflict(409))
self.assertRaises(manager_exceptions.CantAddHost,
self.pool.remove_computehost,
'pool',
'host3')
add_host = self.nova.aggregates.add_host
self.pool.remove_computehost('pool', 'host3')
add_host.assert_not_called()
def test_get_computehosts_with_correct_pool(self):
self._patch_get_aggregate_from_name_or_id()

View File

@ -397,7 +397,7 @@ class ReservationPool(NovaClientWrapper):
self.nova.aggregates.remove_host(agg.id, host)
except nova_exception.ClientException:
hosts_failing_to_remove.append(host)
if freepool_agg.id != agg.id:
if freepool_agg.id != agg.id and host not in freepool_agg.hosts:
# NOTE(sbauza) : We don't want to put again the host in
# freepool if the requested pool is the freepool...
try:

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

@ -0,0 +1,8 @@
Image sources
=============
Images are drawn by `draw.io`_ . To edit images, open `draw.io`_,
select *Open Existing Diagram* and chose the *\*.xml* file under this
directory.
.. _draw.io: <https://www.draw.io/>

View File

@ -0,0 +1 @@
<mxfile editor="www.draw.io" type="device" version="8.0.6"><diagram id="d8c8e69b-b50c-3f6a-fb23-61fa2f2252fc" name="Page-1">7ZtLc+I4EIB/yx44LoVfgI9JJpk5zFalNlv7OG0pWBhNZIuSRYD99SthyQ/JBpPYmExMqlJ2W5ZEf91StyRGzl20+0rBevUbCSAe2ZNgN3K+jGx77tj8vxDslcBKBSFFQSoqCJ7Qf1AKJ1K6QQFMSgUZIZihdVm4IHEMF6wkA5SSbbnYkuByq2sQQkPwtADYlP6FArZKpbPJJJd/gyhcyZYd9eAZLF5CSjaxbG5kO8vDJ30cAVWVLJ+sQEC2BZFzP3LuKCEsvYp2dxALzSqtpe891DzNuk1hzJq84KQvvAK8kd/8DxRB2Te2V+pgcMeru12xCHOBxS8TRskLvCOYUC6JScxL3i4RxpoIYBTG/HbBOwS5/PYVUoa4om/kgwgFgWjmdrtCDD6twUK0ueVWxWUHTULR14monsRMmoply3vV3EgwEB8uN5Ug9SLahruCSCrlKyQRZHTPi6inCqi0X2sq77e5NbhStCoYguIKpP2FWc05BH4hOVQzmRpMfocJ2VCuGHtyY7DJdWTVaLEWW1FpRXa51TZXugE6xCBJJLkWiLgaEMsEMqsA4rQAZFYFBNJXwBCJf7WOEJm0SOR+Kv7eQ6QNv7BPY8h8p20Oc0PTMOCjtrwllK1ISGKA73Pp7WJDXzPnKIMpQIA7xP6WYnH9j7gezzxxG/NeZs/ETeHhD8jYXrIAG0a4KO/Gd0LWsv6zRyrp78XxgAEaQlYSia9/lBqFmBvpa3lSew8ChbvGF+zP5At2E1+omiTa8AXLOgrC+Uwg3D4HJcs2QDxQaIZQ3ej/4fDpWf/O9LT+p12p34xgP5v6vQbm73WlftdQ/8ieYpEsIH4RHrIGiFEcipfEd+YlVQleeVbInNrj4EYkckJZIpJEiwMYQJkpLqA7e6Y9OYdKrdrNlKpkjWde2eAj4dopxFmaS7kaqzQ+kC8V8zutHnt+vJ40qDDq4SoG+0KxtSiQVLekypDlMoGlWg7Gk2mymT2ZsfYxy5BJZgCSVRbkdWoMF6Lv+SVqb8avV9QQ/1vAzQdwprtd2m/fAE6Na58bnDtpCZxeUXfgXDPyUX0xlyQ+1gLeEqP1N/UOv/5T9rnWis5ZwHA10M54UvhYxixfFblaLYROrlfLz/TAgZ96OptfCz+/lp+ZfQ/81MTm6QNtX/w8M9RU/NyBX22uP7sWflPT/wxs52X9ai9uou1enETdyn5JC3A8fSFGBSMFHPaRIOpdi/NmOGLg+Nw5/amYvuMQs7ecfm4GOscs4yfNMPRVOtvxx37x8zZj0KvtLt+Ym/vSg4Of5ZgdM+3PwYdFu4OttbX2o1U0786lh0U7Lna6GZn1ajvEWBUJG5swYNiFea9L6wg7NoXeBvQs88gN6gtk4pylUGl2Fm0JEN5Q+IthNTyTWotLnuVismGnU681pIj3T+RB6qXHXKRlwRfcnV2TBInzFLbsZ5JWIgwmeyZ6NXZ9MTAiynWUirYwYccM/owVE6tsPNbM9IHKYyZtZHV+g90BtaDxHTxD/Jhr5ZkwRqKKFQ9GKvfb1UFbO83FDxYU7UJxvnj8DPgYM+a1vJiLIle4JW9pzOwKZlWJeBvHR33TeUfOjfhG3KNIwIdq0aacDTSUwzKXeuroAD3T6aqOFLWxsuXX7+x4A7I6ZK5aZ+h9ZdI/b8HjQhHUhY4u6IeRppMqCmenxPrK5rSzaLri/H0h+FkQbpH7IeLpMuJxddhVEU/V4NtGxGM3yImHiMecMLV9vez+AhFPxQ80ekQWk3+TffRM8Mfg5vfHbXZV3D6Mq9mzHpE12HQdkJm75j162bwiHxyQnUTm6csuF0TmX9XAmEQIw70ZlNYdl7hGmq7+89HuaPLb/Pfbaf6R/0Teuf8f</diagram></mxfile>

View File

@ -1,8 +0,0 @@
Status graphs
=============
Status graphs are drawn by `draw.io`_ . To edit graphs, open `draw.io`_,
select *Open Existing Diagram* and chose the *\*_statuses.xml* file under this
directory.
.. _draw.io: <https://www.draw.io/>

View File

@ -30,11 +30,13 @@ Reservation Healing
If a host failure is detected, Blazar tries to heal host/instance reservations
which use the failed host by reserving alternative host.
The length of the *healing interval* can be configured by the
*healing_interval* option.
Configurations
==============
To enable the compute host monitor, enable **enable_notification_monitor**
or **enable_polling_monitor** option.
To enable the compute host monitor, enable *enable_notification_monitor*
or *enable_polling_monitor* option, and set *healing_interval* as
appropriate for your cloud.
See also the :doc:`../configuration/blazar-conf` in detail.
detail

View File

@ -6,7 +6,7 @@ Blazar monitors states of resources and heals reservations which are expected
to suffer from resource failure.
Resource specific functionality, e.g., calling Nova APIs, is provided as a
monitoring plugin.
The following sections describes the resource monitoring feature in detail.
The following sections describe the resource monitoring feature in detail.
Monitoring Type
===============
@ -34,7 +34,48 @@ Healing
=======
When the monitor detects a resource failure, it heals reservations which
are expected to suffer from the failure.
are expected to suffer from the failure. Note that it does not immediately
heal all of reservations for the failed resource because the resource is
expected to recover sometime in the future, i.e., the monitor heals only
reservations which are active or will start soon.
In addition, the monitor periodically checks validity of reservations and
heals invalid reservations.
Therefore, even though the failed resource did not recover in the last
interval, the periodic task heals invalid reservations which will start in the
next interval.
The healing flow is as follows:
1. Resource A is reserved for the *Reservation-1*, *Reservation-2* and
*Reservation-3* as shown in the following diagram.
2. At the point 1, the periodic task in the manager checks if there is any
reservation to heal and it detects there is not.
3. At the point 2, the manager detects a failure of the resource A. Then, it
heals active reservations and reservations which will start in the
*healing interval*. In this case, *Reservation-1* and *Reservation-2* are
healed immediately.
4. At the point 3, the periodic task checks if there is any reservation to
heal. In this case, the task finds out there is no reservation to heal
because the resource has not yet recovered but no reservation will
start in next interval. *Reservation-2* has been already healed in step 3.
5. At the point 4, the periodic task checks if there is any reservation to
heal again.
In this case, the task finds out *Reservation-3* needs to be healed because
it will start in the next interval and the resource has not yet recovered.
6. Before the point 5, the manager detects a recovery of the resource.
7. At the point 5, the periodic task finds out there is no failed resource and
nothing to do.
.. image:: ../images/healing_flow.png
:align: center
:width: 600 px
Flags
=====