Refactor evacuators

Allow the evacuators part to be able to do more and to freely evacuate
different types of the workloads(full compute, vms, ...)

Change-Id: I2e8b23a48504b8e4ea13f6b86cb9689d9bab5cf1
Depends-On: I5272ca90d806b8ce83055199724abdc14fe414bc
This commit is contained in:
Saad Zaher 2017-07-13 20:33:48 +01:00
parent 4d11ee77d3
commit b5af64bd82
6 changed files with 105 additions and 138 deletions

View File

@ -16,12 +16,6 @@
# Note: This option can be changed without restarting. # Note: This option can be changed without restarting.
#debug = false #debug = false
# DEPRECATED: If set to false, the logging level will be set to WARNING instead
# of the default INFO level. (boolean value)
# This option is deprecated for removal.
# Its value may be silently ignored in the future.
#verbose = true
# The name of a logging configuration file. This file is appended to any # The name of a logging configuration file. This file is appended to any
# existing logging configuration files. For details about logging configuration # existing logging configuration files. For details about logging configuration
# files, see the Python logging module documentation. Note that when logging # files, see the Python logging module documentation. Note that when logging
@ -60,6 +54,12 @@
# is set. (boolean value) # is set. (boolean value)
#use_syslog = false #use_syslog = false
# Enable journald for logging. If running in a systemd environment you may wish
# to enable journal support. Doing so will use the journal native protocol
# which includes structured metadata in addition to log messages.This option is
# ignored if log_config_append is set. (boolean value)
#use_journal = false
# Syslog facility to receive log lines. This option is ignored if # Syslog facility to receive log lines. This option is ignored if
# log_config_append is set. (string value) # log_config_append is set. (string value)
#syslog_log_facility = LOG_USER #syslog_log_facility = LOG_USER
@ -88,7 +88,7 @@
# List of package logging levels in logger=LEVEL pairs. This option is ignored # List of package logging levels in logger=LEVEL pairs. This option is ignored
# if log_config_append is set. (list value) # if log_config_append is set. (list value)
#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO #default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
# Enables or disables publication of error events. (boolean value) # Enables or disables publication of error events. (boolean value)
#publish_errors = false #publish_errors = false

View File

@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object):
a unified interface a unified interface
""" """
def __init__(self, wait, retries, shared_storage, **kwargs): def __init__(self, nodes, evacuator_conf, fencer):
""" """
Initialize Evacuation driver with the config args Initialize Evacuation driver with the config args
:param wait: time in seconds that the evcauator should wait before :param nodes: A list of nodes to be evacuated!
retrying to disable the node :param evacuator_conf: A dict of arguments that got loaded from the
:param retries: Number of times the evacuator will try to disable the configuration file!
compute node
:param shared_storage: Boolean; True if the compute nodes are running
on shared storage and False otherwise
:param kwargs: Dict of arguments that any future driver may need to
load it from the config file
:return: None :return: None
""" """
self.wait = wait self.nodes = nodes
self.retries = retries self.evacuator_conf = evacuator_conf
self.shared_storage = shared_storage self.fencer = fencer
self.options = kwargs
@abc.abstractmethod @abc.abstractmethod
def disable_node(self, node): def evacuate(self, enable_fencing=True):
""" """Evacuate the infected node.
Disable the compute node from accepting any new VMs or requests :return: Two lists; the first one will be the succeeded nodes and the
:param node: dict contains node's hostname other is the failed nodes
:return: True pr False
""" """
pass pass
@abc.abstractmethod @abc.abstractmethod
def is_node_disabled(self, node):
"""
Check if node is already disabled or not
:param node: dict contains node's hostname
:return: True or False
"""
pass
@abc.abstractmethod
def evacuate_nodes(self, nodes):
"""
Will evacuate all running VMs on the required nodes
:param nodes: list of nodes
:return: list of nodes with updated status
"""
pass
@abc.abstractmethod
def get_node_instances(self, node):
"""
List instances on a compute host
:param node: dict contains node's hostname
:return: List contains running VMs on a given node
"""
pass
def get_info(self): def get_info(self):
""" """
Get Driver Information Get Driver Information
:return: Dict contains driver information :return: Dict contains driver information
""" """
pass pass
@abc.abstractmethod
def get_node_status(self, node):
"""
Check the node status and report it
:param node: dict contains node's hostname
:return: dict with key 'status': 'True or False'
"""
pass

View File

@ -25,71 +25,19 @@ LOG = log.getLogger(__name__)
class EvacuationManager(object): class EvacuationManager(object):
def __init__(self, enable_fencing=True): def __init__(self, enable_fencing=True):
evcuation_conf = CONF.get('evacuation')
self.driver = importutils.import_object(
evcuation_conf.get('driver'),
evcuation_conf.get('wait'),
evcuation_conf.get('retries'),
evcuation_conf.get('shared_storage'),
**evcuation_conf.get('options')
)
self.enable_fencing = enable_fencing self.enable_fencing = enable_fencing
self.wait = evcuation_conf.get('wait')
self.retires = evcuation_conf.get('retries', 1)
if self.retires <= 0:
self.retires = 1
def evacuate(self, nodes): def evacuate(self, nodes):
# try to disable node fencer = FencerManager(nodes)
# @todo needs more error handling like if the status didn't update or evacuation_conf = CONF.get('evacuation')
# we are unable to disable the node ??? driver = importutils.import_object(
failed_nodes = [] # maintain nodes that are going to fail at any state evacuation_conf['driver'],
succeeded_nodes = [] nodes,
for node in nodes: evacuation_conf,
for i in range(0, self.retires): fencer
status = self._disable_node(node) )
# if True ( node disabled ) break the loop
if status:
break
else:
status = False
node['status'] = status
# make sure the disable request was successful
if not self.driver.get_node_status(node):
failed_nodes.append(node)
nodes.remove(node) # if the node failed at any step no reason
# to move it to the next step
else:
succeeded_nodes.append(node)
nodes = succeeded_nodes return driver.evacuate(self.enable_fencing)
if self.enable_fencing:
fencer = FencerManager(nodes)
nodes = fencer.fence()
"""
@todo this code needs to be commented for the time being till we fix
nova bug found in state, which always go up afer enable or disable. We
will use get_node_details for the time being from the main script to
get nodes details before evacuating ...
succeeded_nodes = []
for node in nodes:
node['instances'] = self.driver.get_node_instances(node)
succeeded_nodes.append(node)
nodes = succeeded_nodes
"""
# Start evacuation calls ...
evacuated_nodes = []
for i in range(0, self.retires):
try:
sleep(self.wait)
nodes = self.driver.evacuate_nodes(nodes)
if not nodes:
return evacuated_nodes
evacuated_nodes = nodes
except Exception as e:
LOG.error(e)
return evacuated_nodes
def get_nodes_details(self, nodes): def get_nodes_details(self, nodes):
""" """
@ -98,9 +46,3 @@ class EvacuationManager(object):
:return: list of node with more details :return: list of node with more details
""" """
return get_nodes_details(nodes) return get_nodes_details(nodes)
def _disable_node(self, node):
if not self.driver.is_node_disabled(node):
return self.driver.disable_node(node)
else:
True

View File

@ -15,16 +15,82 @@ from oslo_config import cfg
from oslo_log import log from oslo_log import log
from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
from freezer_dr.common.utils import get_os_client from freezer_dr.common.utils import get_os_client
import time
CONF = cfg.CONF CONF = cfg.CONF
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
class StandardEvacuator(EvacuatorBaseDriver): class StandardEvacuator(EvacuatorBaseDriver):
def __init__(self, wait, retires, shared_storage, **kwargs): def __init__(self, nodes, evacuator_conf, fencer):
super(StandardEvacuator, self).__init__(wait, retires, shared_storage, super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer)
**kwargs) # initialize the OS client!
self.client = get_os_client() self.client = get_os_client()
self.wait = evacuator_conf.get('wait')
self.retires = evacuator_conf.get('retries', 1)
if self.retires <= 0:
self.retires = 1
def _disable_node(self, node):
if not self.is_node_disabled(node):
return self.disable_node(node)
else:
True
def evacuate(self, enable_fencing=True):
# try to disable node
# @todo needs more error handling like if the status didn't update or
# we are unable to disable the node ???
failed_nodes = [] # maintain nodes that are going to fail at any state
succeeded_nodes = []
for node in self.nodes:
status = False
for i in range(0, self.retires):
status = self._disable_node(node)
# if True ( node disabled ) break the loop
if status:
break
else:
status = False
node['status'] = status
# make sure the disable request was successful
if not self.get_node_status(node):
# if the node failed at any step no reason to move it to
# the next step
failed_nodes.append(node)
self.nodes.remove(node) #
else:
succeeded_nodes.append(node)
nodes = succeeded_nodes
if enable_fencing:
self.fencer.update_nodes(nodes)
nodes = self.fencer.fence()
"""
@todo this code needs to be commented for the time being till we fix
nova bug found in state, which always go up afer enable or disable. We
will use get_node_details for the time being from the main script to
get nodes details before evacuating ...
succeeded_nodes = []
for node in nodes:
node['instances'] = self.driver.get_node_instances(node)
succeeded_nodes.append(node)
nodes = succeeded_nodes
"""
# Start evacuation calls ...
evacuated_nodes = []
for i in range(0, self.retires):
try:
time.sleep(self.wait)
nodes = self.evacuate_nodes(nodes)
if not nodes:
break
evacuated_nodes = nodes
except Exception as e:
LOG.error(e)
return evacuated_nodes, failed_nodes
def get_node_instances(self, node): def get_node_instances(self, node):
return self.client.get_hypervisor_instances(node) return self.client.get_hypervisor_instances(node)
@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver):
return self.client.get_node_status(node) return self.client.get_node_status(node)
def evacuate_nodes(self, nodes): def evacuate_nodes(self, nodes):
return self.client.evacuate(nodes, shared_storage=self.shared_storage) return self.client.evacuate(
nodes, shared_storage=self.evacuator_conf['shared_storage'])

View File

@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver):
of Freezer-DR. of Freezer-DR.
""" """
def __init__(self, wait, retires, shared_storage, **kwargs): def __init__(self, nodes, evacuator_conf, fencer):
super(DummyEvacuator, self).__init__(wait, retires, shared_storage, super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer)
**kwargs)
def disable_node(self, node): def disable_node(self, node):
return True return True

View File

@ -41,7 +41,11 @@ def main():
# Shutdown the node # Shutdown the node
evac = EvacuationManager() evac = EvacuationManager()
notify_nodes = evac.get_nodes_details(nodes) notify_nodes = evac.get_nodes_details(nodes)
evac.evacuate(nodes) evacuated_nodes, failed_nodes = evac.evacuate(nodes)
LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes))
LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes))
notifier.notify(notify_nodes, 'success') notifier.notify(notify_nodes, 'success')
failed_nodes = evac.get_nodes_details(failed_nodes)
notifier.notify(failed_nodes, 'error')
else: else:
print "No nodes reported to be down" print "No nodes reported to be down"