diff --git a/etc/freezer-dr.conf.sample b/etc/freezer-dr.conf.sample index f25883e..de1b693 100644 --- a/etc/freezer-dr.conf.sample +++ b/etc/freezer-dr.conf.sample @@ -16,12 +16,6 @@ # Note: This option can be changed without restarting. #debug = false -# DEPRECATED: If set to false, the logging level will be set to WARNING instead -# of the default INFO level. (boolean value) -# This option is deprecated for removal. -# Its value may be silently ignored in the future. -#verbose = true - # The name of a logging configuration file. This file is appended to any # existing logging configuration files. For details about logging configuration # files, see the Python logging module documentation. Note that when logging @@ -60,6 +54,12 @@ # is set. (boolean value) #use_syslog = false +# Enable journald for logging. If running in a systemd environment you may wish +# to enable journal support. Doing so will use the journal native protocol +# which includes structured metadata in addition to log messages.This option is +# ignored if log_config_append is set. (boolean value) +#use_journal = false + # Syslog facility to receive log lines. This option is ignored if # log_config_append is set. (string value) #syslog_log_facility = LOG_USER @@ -88,7 +88,7 @@ # List of package logging levels in logger=LEVEL pairs. This option is ignored # if log_config_append is set. (list value) -#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO +#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO # Enables or disables publication of error events. (boolean value) #publish_errors = false diff --git a/freezer_dr/evacuators/common/driver.py b/freezer_dr/evacuators/common/driver.py index 3be2054..1596721 100644 --- a/freezer_dr/evacuators/common/driver.py +++ b/freezer_dr/evacuators/common/driver.py @@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object): a unified interface """ - def __init__(self, wait, retries, shared_storage, **kwargs): + def __init__(self, nodes, evacuator_conf, fencer): """ Initialize Evacuation driver with the config args - :param wait: time in seconds that the evcauator should wait before - retrying to disable the node - :param retries: Number of times the evacuator will try to disable the - compute node - :param shared_storage: Boolean; True if the compute nodes are running - on shared storage and False otherwise - :param kwargs: Dict of arguments that any future driver may need to - load it from the config file + :param nodes: A list of nodes to be evacuated! + :param evacuator_conf: A dict of arguments that got loaded from the + configuration file! :return: None """ - self.wait = wait - self.retries = retries - self.shared_storage = shared_storage - self.options = kwargs + self.nodes = nodes + self.evacuator_conf = evacuator_conf + self.fencer = fencer @abc.abstractmethod - def disable_node(self, node): - """ - Disable the compute node from accepting any new VMs or requests - :param node: dict contains node's hostname - :return: True pr False + def evacuate(self, enable_fencing=True): + """Evacuate the infected node. + :return: Two lists; the first one will be the succeeded nodes and the + other is the failed nodes """ pass @abc.abstractmethod - def is_node_disabled(self, node): - """ - Check if node is already disabled or not - :param node: dict contains node's hostname - :return: True or False - """ - pass - - @abc.abstractmethod - def evacuate_nodes(self, nodes): - """ - Will evacuate all running VMs on the required nodes - :param nodes: list of nodes - :return: list of nodes with updated status - """ - pass - - @abc.abstractmethod - def get_node_instances(self, node): - """ - List instances on a compute host - :param node: dict contains node's hostname - :return: List contains running VMs on a given node - """ - pass - def get_info(self): """ Get Driver Information :return: Dict contains driver information """ pass - - @abc.abstractmethod - def get_node_status(self, node): - """ - Check the node status and report it - :param node: dict contains node's hostname - :return: dict with key 'status': 'True or False' - """ - pass diff --git a/freezer_dr/evacuators/common/manager.py b/freezer_dr/evacuators/common/manager.py index 1601403..9f19174 100644 --- a/freezer_dr/evacuators/common/manager.py +++ b/freezer_dr/evacuators/common/manager.py @@ -25,71 +25,19 @@ LOG = log.getLogger(__name__) class EvacuationManager(object): def __init__(self, enable_fencing=True): - evcuation_conf = CONF.get('evacuation') - self.driver = importutils.import_object( - evcuation_conf.get('driver'), - evcuation_conf.get('wait'), - evcuation_conf.get('retries'), - evcuation_conf.get('shared_storage'), - **evcuation_conf.get('options') - ) self.enable_fencing = enable_fencing - self.wait = evcuation_conf.get('wait') - self.retires = evcuation_conf.get('retries', 1) - if self.retires <= 0: - self.retires = 1 def evacuate(self, nodes): - # try to disable node - # @todo needs more error handling like if the status didn't update or - # we are unable to disable the node ??? - failed_nodes = [] # maintain nodes that are going to fail at any state - succeeded_nodes = [] - for node in nodes: - for i in range(0, self.retires): - status = self._disable_node(node) - # if True ( node disabled ) break the loop - if status: - break - else: - status = False - node['status'] = status - # make sure the disable request was successful - if not self.driver.get_node_status(node): - failed_nodes.append(node) - nodes.remove(node) # if the node failed at any step no reason - # to move it to the next step - else: - succeeded_nodes.append(node) + fencer = FencerManager(nodes) + evacuation_conf = CONF.get('evacuation') + driver = importutils.import_object( + evacuation_conf['driver'], + nodes, + evacuation_conf, + fencer + ) - nodes = succeeded_nodes - if self.enable_fencing: - fencer = FencerManager(nodes) - nodes = fencer.fence() - """ - @todo this code needs to be commented for the time being till we fix - nova bug found in state, which always go up afer enable or disable. We - will use get_node_details for the time being from the main script to - get nodes details before evacuating ... - succeeded_nodes = [] - for node in nodes: - node['instances'] = self.driver.get_node_instances(node) - succeeded_nodes.append(node) - - nodes = succeeded_nodes - """ - # Start evacuation calls ... - evacuated_nodes = [] - for i in range(0, self.retires): - try: - sleep(self.wait) - nodes = self.driver.evacuate_nodes(nodes) - if not nodes: - return evacuated_nodes - evacuated_nodes = nodes - except Exception as e: - LOG.error(e) - return evacuated_nodes + return driver.evacuate(self.enable_fencing) def get_nodes_details(self, nodes): """ @@ -98,9 +46,3 @@ class EvacuationManager(object): :return: list of node with more details """ return get_nodes_details(nodes) - - def _disable_node(self, node): - if not self.driver.is_node_disabled(node): - return self.driver.disable_node(node) - else: - True diff --git a/freezer_dr/evacuators/drivers/default/standard.py b/freezer_dr/evacuators/drivers/default/standard.py index ec6aaaf..8eff31c 100644 --- a/freezer_dr/evacuators/drivers/default/standard.py +++ b/freezer_dr/evacuators/drivers/default/standard.py @@ -15,16 +15,82 @@ from oslo_config import cfg from oslo_log import log from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver from freezer_dr.common.utils import get_os_client +import time CONF = cfg.CONF LOG = log.getLogger(__name__) class StandardEvacuator(EvacuatorBaseDriver): - def __init__(self, wait, retires, shared_storage, **kwargs): - super(StandardEvacuator, self).__init__(wait, retires, shared_storage, - **kwargs) + def __init__(self, nodes, evacuator_conf, fencer): + super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer) + # initialize the OS client! self.client = get_os_client() + self.wait = evacuator_conf.get('wait') + self.retires = evacuator_conf.get('retries', 1) + if self.retires <= 0: + self.retires = 1 + + def _disable_node(self, node): + if not self.is_node_disabled(node): + return self.disable_node(node) + else: + True + + def evacuate(self, enable_fencing=True): + # try to disable node + # @todo needs more error handling like if the status didn't update or + # we are unable to disable the node ??? + failed_nodes = [] # maintain nodes that are going to fail at any state + succeeded_nodes = [] + for node in self.nodes: + status = False + for i in range(0, self.retires): + status = self._disable_node(node) + # if True ( node disabled ) break the loop + if status: + break + else: + status = False + node['status'] = status + # make sure the disable request was successful + if not self.get_node_status(node): + # if the node failed at any step no reason to move it to + # the next step + failed_nodes.append(node) + self.nodes.remove(node) # + else: + succeeded_nodes.append(node) + + nodes = succeeded_nodes + if enable_fencing: + self.fencer.update_nodes(nodes) + nodes = self.fencer.fence() + """ + @todo this code needs to be commented for the time being till we fix + nova bug found in state, which always go up afer enable or disable. We + will use get_node_details for the time being from the main script to + get nodes details before evacuating ... + succeeded_nodes = [] + for node in nodes: + node['instances'] = self.driver.get_node_instances(node) + succeeded_nodes.append(node) + + nodes = succeeded_nodes + """ + # Start evacuation calls ... + evacuated_nodes = [] + for i in range(0, self.retires): + try: + time.sleep(self.wait) + nodes = self.evacuate_nodes(nodes) + if not nodes: + break + evacuated_nodes = nodes + except Exception as e: + LOG.error(e) + + return evacuated_nodes, failed_nodes def get_node_instances(self, node): return self.client.get_hypervisor_instances(node) @@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver): return self.client.get_node_status(node) def evacuate_nodes(self, nodes): - return self.client.evacuate(nodes, shared_storage=self.shared_storage) - - - + return self.client.evacuate( + nodes, shared_storage=self.evacuator_conf['shared_storage']) diff --git a/freezer_dr/evacuators/drivers/dummy/dummy.py b/freezer_dr/evacuators/drivers/dummy/dummy.py index 65c5f18..93f2bc3 100644 --- a/freezer_dr/evacuators/drivers/dummy/dummy.py +++ b/freezer_dr/evacuators/drivers/dummy/dummy.py @@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver): of Freezer-DR. """ - def __init__(self, wait, retires, shared_storage, **kwargs): - super(DummyEvacuator, self).__init__(wait, retires, shared_storage, - **kwargs) + def __init__(self, nodes, evacuator_conf, fencer): + super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer) def disable_node(self, node): return True diff --git a/freezer_dr/main.py b/freezer_dr/main.py index dc07244..992deb5 100644 --- a/freezer_dr/main.py +++ b/freezer_dr/main.py @@ -41,7 +41,11 @@ def main(): # Shutdown the node evac = EvacuationManager() notify_nodes = evac.get_nodes_details(nodes) - evac.evacuate(nodes) + evacuated_nodes, failed_nodes = evac.evacuate(nodes) + LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes)) + LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes)) notifier.notify(notify_nodes, 'success') + failed_nodes = evac.get_nodes_details(failed_nodes) + notifier.notify(failed_nodes, 'error') else: print "No nodes reported to be down"