Merge "Refactor evacuators"
This commit is contained in:
commit
56efab7b54
|
@ -16,12 +16,6 @@
|
|||
# Note: This option can be changed without restarting.
|
||||
#debug = false
|
||||
|
||||
# DEPRECATED: If set to false, the logging level will be set to WARNING instead
|
||||
# of the default INFO level. (boolean value)
|
||||
# This option is deprecated for removal.
|
||||
# Its value may be silently ignored in the future.
|
||||
#verbose = true
|
||||
|
||||
# The name of a logging configuration file. This file is appended to any
|
||||
# existing logging configuration files. For details about logging configuration
|
||||
# files, see the Python logging module documentation. Note that when logging
|
||||
|
@ -60,6 +54,12 @@
|
|||
# is set. (boolean value)
|
||||
#use_syslog = false
|
||||
|
||||
# Enable journald for logging. If running in a systemd environment you may wish
|
||||
# to enable journal support. Doing so will use the journal native protocol
|
||||
# which includes structured metadata in addition to log messages.This option is
|
||||
# ignored if log_config_append is set. (boolean value)
|
||||
#use_journal = false
|
||||
|
||||
# Syslog facility to receive log lines. This option is ignored if
|
||||
# log_config_append is set. (string value)
|
||||
#syslog_log_facility = LOG_USER
|
||||
|
@ -88,7 +88,7 @@
|
|||
|
||||
# List of package logging levels in logger=LEVEL pairs. This option is ignored
|
||||
# if log_config_append is set. (list value)
|
||||
#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
|
||||
#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
|
||||
|
||||
# Enables or disables publication of error events. (boolean value)
|
||||
#publish_errors = false
|
||||
|
|
|
@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object):
|
|||
a unified interface
|
||||
"""
|
||||
|
||||
def __init__(self, wait, retries, shared_storage, **kwargs):
|
||||
def __init__(self, nodes, evacuator_conf, fencer):
|
||||
"""
|
||||
Initialize Evacuation driver with the config args
|
||||
:param wait: time in seconds that the evcauator should wait before
|
||||
retrying to disable the node
|
||||
:param retries: Number of times the evacuator will try to disable the
|
||||
compute node
|
||||
:param shared_storage: Boolean; True if the compute nodes are running
|
||||
on shared storage and False otherwise
|
||||
:param kwargs: Dict of arguments that any future driver may need to
|
||||
load it from the config file
|
||||
:param nodes: A list of nodes to be evacuated!
|
||||
:param evacuator_conf: A dict of arguments that got loaded from the
|
||||
configuration file!
|
||||
:return: None
|
||||
"""
|
||||
self.wait = wait
|
||||
self.retries = retries
|
||||
self.shared_storage = shared_storage
|
||||
self.options = kwargs
|
||||
self.nodes = nodes
|
||||
self.evacuator_conf = evacuator_conf
|
||||
self.fencer = fencer
|
||||
|
||||
@abc.abstractmethod
|
||||
def disable_node(self, node):
|
||||
"""
|
||||
Disable the compute node from accepting any new VMs or requests
|
||||
:param node: dict contains node's hostname
|
||||
:return: True pr False
|
||||
def evacuate(self, enable_fencing=True):
|
||||
"""Evacuate the infected node.
|
||||
:return: Two lists; the first one will be the succeeded nodes and the
|
||||
other is the failed nodes
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_node_disabled(self, node):
|
||||
"""
|
||||
Check if node is already disabled or not
|
||||
:param node: dict contains node's hostname
|
||||
:return: True or False
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def evacuate_nodes(self, nodes):
|
||||
"""
|
||||
Will evacuate all running VMs on the required nodes
|
||||
:param nodes: list of nodes
|
||||
:return: list of nodes with updated status
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_node_instances(self, node):
|
||||
"""
|
||||
List instances on a compute host
|
||||
:param node: dict contains node's hostname
|
||||
:return: List contains running VMs on a given node
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_info(self):
|
||||
"""
|
||||
Get Driver Information
|
||||
:return: Dict contains driver information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_node_status(self, node):
|
||||
"""
|
||||
Check the node status and report it
|
||||
:param node: dict contains node's hostname
|
||||
:return: dict with key 'status': 'True or False'
|
||||
"""
|
||||
pass
|
||||
|
|
|
@ -25,71 +25,19 @@ LOG = log.getLogger(__name__)
|
|||
class EvacuationManager(object):
|
||||
|
||||
def __init__(self, enable_fencing=True):
|
||||
evcuation_conf = CONF.get('evacuation')
|
||||
self.driver = importutils.import_object(
|
||||
evcuation_conf.get('driver'),
|
||||
evcuation_conf.get('wait'),
|
||||
evcuation_conf.get('retries'),
|
||||
evcuation_conf.get('shared_storage'),
|
||||
**evcuation_conf.get('options')
|
||||
)
|
||||
self.enable_fencing = enable_fencing
|
||||
self.wait = evcuation_conf.get('wait')
|
||||
self.retires = evcuation_conf.get('retries', 1)
|
||||
if self.retires <= 0:
|
||||
self.retires = 1
|
||||
|
||||
def evacuate(self, nodes):
|
||||
# try to disable node
|
||||
# @todo needs more error handling like if the status didn't update or
|
||||
# we are unable to disable the node ???
|
||||
failed_nodes = [] # maintain nodes that are going to fail at any state
|
||||
succeeded_nodes = []
|
||||
for node in nodes:
|
||||
for i in range(0, self.retires):
|
||||
status = self._disable_node(node)
|
||||
# if True ( node disabled ) break the loop
|
||||
if status:
|
||||
break
|
||||
else:
|
||||
status = False
|
||||
node['status'] = status
|
||||
# make sure the disable request was successful
|
||||
if not self.driver.get_node_status(node):
|
||||
failed_nodes.append(node)
|
||||
nodes.remove(node) # if the node failed at any step no reason
|
||||
# to move it to the next step
|
||||
else:
|
||||
succeeded_nodes.append(node)
|
||||
fencer = FencerManager(nodes)
|
||||
evacuation_conf = CONF.get('evacuation')
|
||||
driver = importutils.import_object(
|
||||
evacuation_conf['driver'],
|
||||
nodes,
|
||||
evacuation_conf,
|
||||
fencer
|
||||
)
|
||||
|
||||
nodes = succeeded_nodes
|
||||
if self.enable_fencing:
|
||||
fencer = FencerManager(nodes)
|
||||
nodes = fencer.fence()
|
||||
"""
|
||||
@todo this code needs to be commented for the time being till we fix
|
||||
nova bug found in state, which always go up afer enable or disable. We
|
||||
will use get_node_details for the time being from the main script to
|
||||
get nodes details before evacuating ...
|
||||
succeeded_nodes = []
|
||||
for node in nodes:
|
||||
node['instances'] = self.driver.get_node_instances(node)
|
||||
succeeded_nodes.append(node)
|
||||
|
||||
nodes = succeeded_nodes
|
||||
"""
|
||||
# Start evacuation calls ...
|
||||
evacuated_nodes = []
|
||||
for i in range(0, self.retires):
|
||||
try:
|
||||
sleep(self.wait)
|
||||
nodes = self.driver.evacuate_nodes(nodes)
|
||||
if not nodes:
|
||||
return evacuated_nodes
|
||||
evacuated_nodes = nodes
|
||||
except Exception as e:
|
||||
LOG.error(e)
|
||||
return evacuated_nodes
|
||||
return driver.evacuate(self.enable_fencing)
|
||||
|
||||
def get_nodes_details(self, nodes):
|
||||
"""
|
||||
|
@ -98,9 +46,3 @@ class EvacuationManager(object):
|
|||
:return: list of node with more details
|
||||
"""
|
||||
return get_nodes_details(nodes)
|
||||
|
||||
def _disable_node(self, node):
|
||||
if not self.driver.is_node_disabled(node):
|
||||
return self.driver.disable_node(node)
|
||||
else:
|
||||
True
|
||||
|
|
|
@ -15,16 +15,82 @@ from oslo_config import cfg
|
|||
from oslo_log import log
|
||||
from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
|
||||
from freezer_dr.common.utils import get_os_client
|
||||
import time
|
||||
CONF = cfg.CONF
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class StandardEvacuator(EvacuatorBaseDriver):
|
||||
|
||||
def __init__(self, wait, retires, shared_storage, **kwargs):
|
||||
super(StandardEvacuator, self).__init__(wait, retires, shared_storage,
|
||||
**kwargs)
|
||||
def __init__(self, nodes, evacuator_conf, fencer):
|
||||
super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer)
|
||||
# initialize the OS client!
|
||||
self.client = get_os_client()
|
||||
self.wait = evacuator_conf.get('wait')
|
||||
self.retires = evacuator_conf.get('retries', 1)
|
||||
if self.retires <= 0:
|
||||
self.retires = 1
|
||||
|
||||
def _disable_node(self, node):
|
||||
if not self.is_node_disabled(node):
|
||||
return self.disable_node(node)
|
||||
else:
|
||||
True
|
||||
|
||||
def evacuate(self, enable_fencing=True):
|
||||
# try to disable node
|
||||
# @todo needs more error handling like if the status didn't update or
|
||||
# we are unable to disable the node ???
|
||||
failed_nodes = [] # maintain nodes that are going to fail at any state
|
||||
succeeded_nodes = []
|
||||
for node in self.nodes:
|
||||
status = False
|
||||
for i in range(0, self.retires):
|
||||
status = self._disable_node(node)
|
||||
# if True ( node disabled ) break the loop
|
||||
if status:
|
||||
break
|
||||
else:
|
||||
status = False
|
||||
node['status'] = status
|
||||
# make sure the disable request was successful
|
||||
if not self.get_node_status(node):
|
||||
# if the node failed at any step no reason to move it to
|
||||
# the next step
|
||||
failed_nodes.append(node)
|
||||
self.nodes.remove(node) #
|
||||
else:
|
||||
succeeded_nodes.append(node)
|
||||
|
||||
nodes = succeeded_nodes
|
||||
if enable_fencing:
|
||||
self.fencer.update_nodes(nodes)
|
||||
nodes = self.fencer.fence()
|
||||
"""
|
||||
@todo this code needs to be commented for the time being till we fix
|
||||
nova bug found in state, which always go up afer enable or disable. We
|
||||
will use get_node_details for the time being from the main script to
|
||||
get nodes details before evacuating ...
|
||||
succeeded_nodes = []
|
||||
for node in nodes:
|
||||
node['instances'] = self.driver.get_node_instances(node)
|
||||
succeeded_nodes.append(node)
|
||||
|
||||
nodes = succeeded_nodes
|
||||
"""
|
||||
# Start evacuation calls ...
|
||||
evacuated_nodes = []
|
||||
for i in range(0, self.retires):
|
||||
try:
|
||||
time.sleep(self.wait)
|
||||
nodes = self.evacuate_nodes(nodes)
|
||||
if not nodes:
|
||||
break
|
||||
evacuated_nodes = nodes
|
||||
except Exception as e:
|
||||
LOG.error(e)
|
||||
|
||||
return evacuated_nodes, failed_nodes
|
||||
|
||||
def get_node_instances(self, node):
|
||||
return self.client.get_hypervisor_instances(node)
|
||||
|
@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver):
|
|||
return self.client.get_node_status(node)
|
||||
|
||||
def evacuate_nodes(self, nodes):
|
||||
return self.client.evacuate(nodes, shared_storage=self.shared_storage)
|
||||
|
||||
|
||||
|
||||
return self.client.evacuate(
|
||||
nodes, shared_storage=self.evacuator_conf['shared_storage'])
|
||||
|
|
|
@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver):
|
|||
of Freezer-DR.
|
||||
"""
|
||||
|
||||
def __init__(self, wait, retires, shared_storage, **kwargs):
|
||||
super(DummyEvacuator, self).__init__(wait, retires, shared_storage,
|
||||
**kwargs)
|
||||
def __init__(self, nodes, evacuator_conf, fencer):
|
||||
super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer)
|
||||
|
||||
def disable_node(self, node):
|
||||
return True
|
||||
|
|
|
@ -41,7 +41,11 @@ def main():
|
|||
# Shutdown the node
|
||||
evac = EvacuationManager()
|
||||
notify_nodes = evac.get_nodes_details(nodes)
|
||||
evac.evacuate(nodes)
|
||||
evacuated_nodes, failed_nodes = evac.evacuate(nodes)
|
||||
LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes))
|
||||
LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes))
|
||||
notifier.notify(notify_nodes, 'success')
|
||||
failed_nodes = evac.get_nodes_details(failed_nodes)
|
||||
notifier.notify(failed_nodes, 'error')
|
||||
else:
|
||||
print "No nodes reported to be down"
|
||||
|
|
Loading…
Reference in New Issue