Refactor evacuators
Allow the evacuators part to be able to do more and to freely evacuate different types of the workloads(full compute, vms, ...) Change-Id: I2e8b23a48504b8e4ea13f6b86cb9689d9bab5cf1 Depends-On: I5272ca90d806b8ce83055199724abdc14fe414bc
This commit is contained in:
parent
4d11ee77d3
commit
b5af64bd82
|
@ -16,12 +16,6 @@
|
||||||
# Note: This option can be changed without restarting.
|
# Note: This option can be changed without restarting.
|
||||||
#debug = false
|
#debug = false
|
||||||
|
|
||||||
# DEPRECATED: If set to false, the logging level will be set to WARNING instead
|
|
||||||
# of the default INFO level. (boolean value)
|
|
||||||
# This option is deprecated for removal.
|
|
||||||
# Its value may be silently ignored in the future.
|
|
||||||
#verbose = true
|
|
||||||
|
|
||||||
# The name of a logging configuration file. This file is appended to any
|
# The name of a logging configuration file. This file is appended to any
|
||||||
# existing logging configuration files. For details about logging configuration
|
# existing logging configuration files. For details about logging configuration
|
||||||
# files, see the Python logging module documentation. Note that when logging
|
# files, see the Python logging module documentation. Note that when logging
|
||||||
|
@ -60,6 +54,12 @@
|
||||||
# is set. (boolean value)
|
# is set. (boolean value)
|
||||||
#use_syslog = false
|
#use_syslog = false
|
||||||
|
|
||||||
|
# Enable journald for logging. If running in a systemd environment you may wish
|
||||||
|
# to enable journal support. Doing so will use the journal native protocol
|
||||||
|
# which includes structured metadata in addition to log messages.This option is
|
||||||
|
# ignored if log_config_append is set. (boolean value)
|
||||||
|
#use_journal = false
|
||||||
|
|
||||||
# Syslog facility to receive log lines. This option is ignored if
|
# Syslog facility to receive log lines. This option is ignored if
|
||||||
# log_config_append is set. (string value)
|
# log_config_append is set. (string value)
|
||||||
#syslog_log_facility = LOG_USER
|
#syslog_log_facility = LOG_USER
|
||||||
|
@ -88,7 +88,7 @@
|
||||||
|
|
||||||
# List of package logging levels in logger=LEVEL pairs. This option is ignored
|
# List of package logging levels in logger=LEVEL pairs. This option is ignored
|
||||||
# if log_config_append is set. (list value)
|
# if log_config_append is set. (list value)
|
||||||
#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
|
#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
|
||||||
|
|
||||||
# Enables or disables publication of error events. (boolean value)
|
# Enables or disables publication of error events. (boolean value)
|
||||||
#publish_errors = false
|
#publish_errors = false
|
||||||
|
|
|
@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object):
|
||||||
a unified interface
|
a unified interface
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, wait, retries, shared_storage, **kwargs):
|
def __init__(self, nodes, evacuator_conf, fencer):
|
||||||
"""
|
"""
|
||||||
Initialize Evacuation driver with the config args
|
Initialize Evacuation driver with the config args
|
||||||
:param wait: time in seconds that the evcauator should wait before
|
:param nodes: A list of nodes to be evacuated!
|
||||||
retrying to disable the node
|
:param evacuator_conf: A dict of arguments that got loaded from the
|
||||||
:param retries: Number of times the evacuator will try to disable the
|
configuration file!
|
||||||
compute node
|
|
||||||
:param shared_storage: Boolean; True if the compute nodes are running
|
|
||||||
on shared storage and False otherwise
|
|
||||||
:param kwargs: Dict of arguments that any future driver may need to
|
|
||||||
load it from the config file
|
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
self.wait = wait
|
self.nodes = nodes
|
||||||
self.retries = retries
|
self.evacuator_conf = evacuator_conf
|
||||||
self.shared_storage = shared_storage
|
self.fencer = fencer
|
||||||
self.options = kwargs
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def disable_node(self, node):
|
def evacuate(self, enable_fencing=True):
|
||||||
"""
|
"""Evacuate the infected node.
|
||||||
Disable the compute node from accepting any new VMs or requests
|
:return: Two lists; the first one will be the succeeded nodes and the
|
||||||
:param node: dict contains node's hostname
|
other is the failed nodes
|
||||||
:return: True pr False
|
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def is_node_disabled(self, node):
|
|
||||||
"""
|
|
||||||
Check if node is already disabled or not
|
|
||||||
:param node: dict contains node's hostname
|
|
||||||
:return: True or False
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def evacuate_nodes(self, nodes):
|
|
||||||
"""
|
|
||||||
Will evacuate all running VMs on the required nodes
|
|
||||||
:param nodes: list of nodes
|
|
||||||
:return: list of nodes with updated status
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def get_node_instances(self, node):
|
|
||||||
"""
|
|
||||||
List instances on a compute host
|
|
||||||
:param node: dict contains node's hostname
|
|
||||||
:return: List contains running VMs on a given node
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
"""
|
"""
|
||||||
Get Driver Information
|
Get Driver Information
|
||||||
:return: Dict contains driver information
|
:return: Dict contains driver information
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abc.abstractmethod
|
|
||||||
def get_node_status(self, node):
|
|
||||||
"""
|
|
||||||
Check the node status and report it
|
|
||||||
:param node: dict contains node's hostname
|
|
||||||
:return: dict with key 'status': 'True or False'
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
|
@ -25,71 +25,19 @@ LOG = log.getLogger(__name__)
|
||||||
class EvacuationManager(object):
|
class EvacuationManager(object):
|
||||||
|
|
||||||
def __init__(self, enable_fencing=True):
|
def __init__(self, enable_fencing=True):
|
||||||
evcuation_conf = CONF.get('evacuation')
|
|
||||||
self.driver = importutils.import_object(
|
|
||||||
evcuation_conf.get('driver'),
|
|
||||||
evcuation_conf.get('wait'),
|
|
||||||
evcuation_conf.get('retries'),
|
|
||||||
evcuation_conf.get('shared_storage'),
|
|
||||||
**evcuation_conf.get('options')
|
|
||||||
)
|
|
||||||
self.enable_fencing = enable_fencing
|
self.enable_fencing = enable_fencing
|
||||||
self.wait = evcuation_conf.get('wait')
|
|
||||||
self.retires = evcuation_conf.get('retries', 1)
|
|
||||||
if self.retires <= 0:
|
|
||||||
self.retires = 1
|
|
||||||
|
|
||||||
def evacuate(self, nodes):
|
def evacuate(self, nodes):
|
||||||
# try to disable node
|
fencer = FencerManager(nodes)
|
||||||
# @todo needs more error handling like if the status didn't update or
|
evacuation_conf = CONF.get('evacuation')
|
||||||
# we are unable to disable the node ???
|
driver = importutils.import_object(
|
||||||
failed_nodes = [] # maintain nodes that are going to fail at any state
|
evacuation_conf['driver'],
|
||||||
succeeded_nodes = []
|
nodes,
|
||||||
for node in nodes:
|
evacuation_conf,
|
||||||
for i in range(0, self.retires):
|
fencer
|
||||||
status = self._disable_node(node)
|
)
|
||||||
# if True ( node disabled ) break the loop
|
|
||||||
if status:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
status = False
|
|
||||||
node['status'] = status
|
|
||||||
# make sure the disable request was successful
|
|
||||||
if not self.driver.get_node_status(node):
|
|
||||||
failed_nodes.append(node)
|
|
||||||
nodes.remove(node) # if the node failed at any step no reason
|
|
||||||
# to move it to the next step
|
|
||||||
else:
|
|
||||||
succeeded_nodes.append(node)
|
|
||||||
|
|
||||||
nodes = succeeded_nodes
|
return driver.evacuate(self.enable_fencing)
|
||||||
if self.enable_fencing:
|
|
||||||
fencer = FencerManager(nodes)
|
|
||||||
nodes = fencer.fence()
|
|
||||||
"""
|
|
||||||
@todo this code needs to be commented for the time being till we fix
|
|
||||||
nova bug found in state, which always go up afer enable or disable. We
|
|
||||||
will use get_node_details for the time being from the main script to
|
|
||||||
get nodes details before evacuating ...
|
|
||||||
succeeded_nodes = []
|
|
||||||
for node in nodes:
|
|
||||||
node['instances'] = self.driver.get_node_instances(node)
|
|
||||||
succeeded_nodes.append(node)
|
|
||||||
|
|
||||||
nodes = succeeded_nodes
|
|
||||||
"""
|
|
||||||
# Start evacuation calls ...
|
|
||||||
evacuated_nodes = []
|
|
||||||
for i in range(0, self.retires):
|
|
||||||
try:
|
|
||||||
sleep(self.wait)
|
|
||||||
nodes = self.driver.evacuate_nodes(nodes)
|
|
||||||
if not nodes:
|
|
||||||
return evacuated_nodes
|
|
||||||
evacuated_nodes = nodes
|
|
||||||
except Exception as e:
|
|
||||||
LOG.error(e)
|
|
||||||
return evacuated_nodes
|
|
||||||
|
|
||||||
def get_nodes_details(self, nodes):
|
def get_nodes_details(self, nodes):
|
||||||
"""
|
"""
|
||||||
|
@ -98,9 +46,3 @@ class EvacuationManager(object):
|
||||||
:return: list of node with more details
|
:return: list of node with more details
|
||||||
"""
|
"""
|
||||||
return get_nodes_details(nodes)
|
return get_nodes_details(nodes)
|
||||||
|
|
||||||
def _disable_node(self, node):
|
|
||||||
if not self.driver.is_node_disabled(node):
|
|
||||||
return self.driver.disable_node(node)
|
|
||||||
else:
|
|
||||||
True
|
|
||||||
|
|
|
@ -15,16 +15,82 @@ from oslo_config import cfg
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
|
from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
|
||||||
from freezer_dr.common.utils import get_os_client
|
from freezer_dr.common.utils import get_os_client
|
||||||
|
import time
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class StandardEvacuator(EvacuatorBaseDriver):
|
class StandardEvacuator(EvacuatorBaseDriver):
|
||||||
|
|
||||||
def __init__(self, wait, retires, shared_storage, **kwargs):
|
def __init__(self, nodes, evacuator_conf, fencer):
|
||||||
super(StandardEvacuator, self).__init__(wait, retires, shared_storage,
|
super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer)
|
||||||
**kwargs)
|
# initialize the OS client!
|
||||||
self.client = get_os_client()
|
self.client = get_os_client()
|
||||||
|
self.wait = evacuator_conf.get('wait')
|
||||||
|
self.retires = evacuator_conf.get('retries', 1)
|
||||||
|
if self.retires <= 0:
|
||||||
|
self.retires = 1
|
||||||
|
|
||||||
|
def _disable_node(self, node):
|
||||||
|
if not self.is_node_disabled(node):
|
||||||
|
return self.disable_node(node)
|
||||||
|
else:
|
||||||
|
True
|
||||||
|
|
||||||
|
def evacuate(self, enable_fencing=True):
|
||||||
|
# try to disable node
|
||||||
|
# @todo needs more error handling like if the status didn't update or
|
||||||
|
# we are unable to disable the node ???
|
||||||
|
failed_nodes = [] # maintain nodes that are going to fail at any state
|
||||||
|
succeeded_nodes = []
|
||||||
|
for node in self.nodes:
|
||||||
|
status = False
|
||||||
|
for i in range(0, self.retires):
|
||||||
|
status = self._disable_node(node)
|
||||||
|
# if True ( node disabled ) break the loop
|
||||||
|
if status:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
status = False
|
||||||
|
node['status'] = status
|
||||||
|
# make sure the disable request was successful
|
||||||
|
if not self.get_node_status(node):
|
||||||
|
# if the node failed at any step no reason to move it to
|
||||||
|
# the next step
|
||||||
|
failed_nodes.append(node)
|
||||||
|
self.nodes.remove(node) #
|
||||||
|
else:
|
||||||
|
succeeded_nodes.append(node)
|
||||||
|
|
||||||
|
nodes = succeeded_nodes
|
||||||
|
if enable_fencing:
|
||||||
|
self.fencer.update_nodes(nodes)
|
||||||
|
nodes = self.fencer.fence()
|
||||||
|
"""
|
||||||
|
@todo this code needs to be commented for the time being till we fix
|
||||||
|
nova bug found in state, which always go up afer enable or disable. We
|
||||||
|
will use get_node_details for the time being from the main script to
|
||||||
|
get nodes details before evacuating ...
|
||||||
|
succeeded_nodes = []
|
||||||
|
for node in nodes:
|
||||||
|
node['instances'] = self.driver.get_node_instances(node)
|
||||||
|
succeeded_nodes.append(node)
|
||||||
|
|
||||||
|
nodes = succeeded_nodes
|
||||||
|
"""
|
||||||
|
# Start evacuation calls ...
|
||||||
|
evacuated_nodes = []
|
||||||
|
for i in range(0, self.retires):
|
||||||
|
try:
|
||||||
|
time.sleep(self.wait)
|
||||||
|
nodes = self.evacuate_nodes(nodes)
|
||||||
|
if not nodes:
|
||||||
|
break
|
||||||
|
evacuated_nodes = nodes
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error(e)
|
||||||
|
|
||||||
|
return evacuated_nodes, failed_nodes
|
||||||
|
|
||||||
def get_node_instances(self, node):
|
def get_node_instances(self, node):
|
||||||
return self.client.get_hypervisor_instances(node)
|
return self.client.get_hypervisor_instances(node)
|
||||||
|
@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver):
|
||||||
return self.client.get_node_status(node)
|
return self.client.get_node_status(node)
|
||||||
|
|
||||||
def evacuate_nodes(self, nodes):
|
def evacuate_nodes(self, nodes):
|
||||||
return self.client.evacuate(nodes, shared_storage=self.shared_storage)
|
return self.client.evacuate(
|
||||||
|
nodes, shared_storage=self.evacuator_conf['shared_storage'])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver):
|
||||||
of Freezer-DR.
|
of Freezer-DR.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, wait, retires, shared_storage, **kwargs):
|
def __init__(self, nodes, evacuator_conf, fencer):
|
||||||
super(DummyEvacuator, self).__init__(wait, retires, shared_storage,
|
super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer)
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
def disable_node(self, node):
|
def disable_node(self, node):
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -41,7 +41,11 @@ def main():
|
||||||
# Shutdown the node
|
# Shutdown the node
|
||||||
evac = EvacuationManager()
|
evac = EvacuationManager()
|
||||||
notify_nodes = evac.get_nodes_details(nodes)
|
notify_nodes = evac.get_nodes_details(nodes)
|
||||||
evac.evacuate(nodes)
|
evacuated_nodes, failed_nodes = evac.evacuate(nodes)
|
||||||
|
LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes))
|
||||||
|
LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes))
|
||||||
notifier.notify(notify_nodes, 'success')
|
notifier.notify(notify_nodes, 'success')
|
||||||
|
failed_nodes = evac.get_nodes_details(failed_nodes)
|
||||||
|
notifier.notify(failed_nodes, 'error')
|
||||||
else:
|
else:
|
||||||
print "No nodes reported to be down"
|
print "No nodes reported to be down"
|
||||||
|
|
Loading…
Reference in New Issue