162 lines
6.1 KiB
Python
162 lines
6.1 KiB
Python
# Copyright 2016 NTT DATA
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from eventlet import timeout as etimeout
|
|
|
|
from oslo_log import log as logging
|
|
from oslo_service import loopingcall
|
|
from oslo_utils import strutils
|
|
import taskflow.engines
|
|
from taskflow.patterns import linear_flow
|
|
|
|
import masakari.conf
|
|
from masakari.engine.drivers.taskflow import base
|
|
from masakari import exception
|
|
from masakari.i18n import _
|
|
|
|
|
|
CONF = masakari.conf.CONF
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
ACTION = "instance:recovery"
|
|
|
|
|
|
class StopInstanceTask(base.MasakariTask):
|
|
def __init__(self, novaclient):
|
|
requires = ["instance_uuid"]
|
|
super(StopInstanceTask, self).__init__(addons=[ACTION],
|
|
requires=requires)
|
|
self.novaclient = novaclient
|
|
|
|
def execute(self, context, instance_uuid):
|
|
"""Stop the instance for recovery."""
|
|
instance = self.novaclient.get_server(context, instance_uuid)
|
|
|
|
# If an instance is not HA_Enabled and "process_all_instances" config
|
|
# option is also disabled, then there is no need to take any recovery
|
|
# action.
|
|
if not CONF.instance_failure.process_all_instances and not (
|
|
strutils.bool_from_string(
|
|
instance.metadata.get('HA_Enabled', False))):
|
|
LOG.info("Skipping recovery for instance: %s as it is "
|
|
"not Ha_Enabled.", instance_uuid)
|
|
raise exception.SkipInstanceRecoveryException()
|
|
|
|
vm_state = getattr(instance, 'OS-EXT-STS:vm_state')
|
|
if vm_state != 'stopped':
|
|
if vm_state == 'resized':
|
|
self.novaclient.reset_instance_state(
|
|
context, instance.id, 'active')
|
|
|
|
self.novaclient.stop_server(context, instance.id)
|
|
|
|
def _wait_for_power_off():
|
|
new_instance = self.novaclient.get_server(context, instance_uuid)
|
|
vm_state = getattr(new_instance, 'OS-EXT-STS:vm_state')
|
|
if vm_state == 'stopped':
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
periodic_call = loopingcall.FixedIntervalLoopingCall(
|
|
_wait_for_power_off)
|
|
|
|
try:
|
|
# add a timeout to the periodic call.
|
|
periodic_call.start(interval=CONF.verify_interval)
|
|
etimeout.with_timeout(CONF.wait_period_after_power_off,
|
|
periodic_call.wait)
|
|
except etimeout.Timeout:
|
|
msg = _("Failed to stop instance %(instance)s") % {
|
|
'instance': instance.id
|
|
}
|
|
raise exception.InstanceRecoveryFailureException(message=msg)
|
|
finally:
|
|
# stop the periodic call, in case of exceptions or Timeout.
|
|
periodic_call.stop()
|
|
|
|
|
|
class StartInstanceTask(base.MasakariTask):
|
|
def __init__(self, novaclient):
|
|
requires = ["instance_uuid"]
|
|
super(StartInstanceTask, self).__init__(addons=[ACTION],
|
|
requires=requires)
|
|
self.novaclient = novaclient
|
|
|
|
def execute(self, context, instance_uuid):
|
|
"""Start the instance."""
|
|
instance = self.novaclient.get_server(context, instance_uuid)
|
|
vm_state = getattr(instance, 'OS-EXT-STS:vm_state')
|
|
if vm_state == 'stopped':
|
|
self.novaclient.start_server(context, instance.id)
|
|
else:
|
|
msg = _("Invalid state for Instance %(instance)s. Expected state: "
|
|
"'STOPPED', Actual state: '%(actual_state)s'") % {
|
|
'instance': instance_uuid,
|
|
'actual_state': vm_state
|
|
}
|
|
raise exception.InstanceRecoveryFailureException(message=msg)
|
|
|
|
|
|
class ConfirmInstanceActiveTask(base.MasakariTask):
|
|
def __init__(self, novaclient):
|
|
requires = ["instance_uuid"]
|
|
super(ConfirmInstanceActiveTask, self).__init__(addons=[ACTION],
|
|
requires=requires)
|
|
self.novaclient = novaclient
|
|
|
|
def execute(self, context, instance_uuid):
|
|
def _wait_for_active():
|
|
new_instance = self.novaclient.get_server(context, instance_uuid)
|
|
vm_state = getattr(new_instance, 'OS-EXT-STS:vm_state')
|
|
if vm_state == 'active':
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
periodic_call = loopingcall.FixedIntervalLoopingCall(
|
|
_wait_for_active)
|
|
try:
|
|
# add a timeout to the periodic call.
|
|
periodic_call.start(interval=CONF.verify_interval)
|
|
etimeout.with_timeout(CONF.wait_period_after_power_on,
|
|
periodic_call.wait)
|
|
except etimeout.Timeout:
|
|
msg = _("Failed to start instance %(instance)s") % {
|
|
'instance': instance_uuid
|
|
}
|
|
raise exception.InstanceRecoveryFailureException(message=msg)
|
|
finally:
|
|
# stop the periodic call, in case of exceptions or Timeout.
|
|
periodic_call.stop()
|
|
|
|
|
|
def get_instance_recovery_flow(novaclient, process_what):
|
|
"""Constructs and returns the engine entrypoint flow.
|
|
|
|
This flow will do the following:
|
|
|
|
1. Stop the instance
|
|
2. Start the instance.
|
|
3. Confirm instance is in active state.
|
|
"""
|
|
|
|
flow_name = ACTION.replace(":", "_") + "_engine"
|
|
instance_recovery_workflow = linear_flow.Flow(flow_name)
|
|
|
|
instance_recovery_workflow.add(StopInstanceTask(novaclient),
|
|
StartInstanceTask(novaclient),
|
|
ConfirmInstanceActiveTask(novaclient))
|
|
|
|
return taskflow.engines.load(instance_recovery_workflow,
|
|
store=process_what)
|