Merge "Refactor evacuators"

2017-07-14 09:07:26 +00:00 · 2017-07-14 09:07:26 +00:00 · 56efab7b54
parent e01bf61472 b5af64bd82
commit 56efab7b54
6 changed files with 105 additions and 138 deletions
--- a/etc/freezer-dr.conf.sample
+++ b/etc/freezer-dr.conf.sample
@ -16,12 +16,6 @@
 # Note: This option can be changed without restarting.
 #debug = false

-# DEPRECATED: If set to false, the logging level will be set to WARNING instead
-# of the default INFO level. (boolean value)
-# This option is deprecated for removal.
-# Its value may be silently ignored in the future.
-#verbose = true
-
 # The name of a logging configuration file. This file is appended to any
 # existing logging configuration files. For details about logging configuration
 # files, see the Python logging module documentation. Note that when logging
@ -60,6 +54,12 @@
 # is set. (boolean value)
 #use_syslog = false

+# Enable journald for logging. If running in a systemd environment you may wish
+# to enable journal support. Doing so will use the journal native protocol
+# which includes structured metadata in addition to log messages.This option is
+# ignored if log_config_append is set. (boolean value)
+#use_journal = false
+
 # Syslog facility to receive log lines. This option is ignored if
 # log_config_append is set. (string value)
 #syslog_log_facility = LOG_USER
@ -88,7 +88,7 @@

 # List of package logging levels in logger=LEVEL pairs. This option is ignored
 # if log_config_append is set. (list value)
-#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
+#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO

 # Enables or disables publication of error events. (boolean value)
 #publish_errors = false
--- a/freezer_dr/evacuators/common/driver.py
+++ b/freezer_dr/evacuators/common/driver.py
@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object):
    a unified interface
    """

-    def __init__(self, wait, retries, shared_storage, **kwargs):
+    def __init__(self, nodes, evacuator_conf, fencer):
        """
        Initialize Evacuation driver with the config args
-        :param wait: time in seconds that the evcauator should wait before
-        retrying to disable the node
-        :param retries: Number of times the evacuator will try to disable the
-        compute node
-        :param shared_storage: Boolean; True if the compute nodes are running
-        on shared storage and False otherwise
-        :param kwargs: Dict of arguments that any future driver may need to
-        load it from the config file
+        :param nodes: A list of nodes to be evacuated!
+        :param evacuator_conf: A dict of arguments that got loaded from the 
+        configuration file! 
        :return: None
        """
-        self.wait = wait
-        self.retries = retries
-        self.shared_storage = shared_storage
-        self.options = kwargs
+        self.nodes = nodes
+        self.evacuator_conf = evacuator_conf
+        self.fencer = fencer

    @abc.abstractmethod
-    def disable_node(self, node):
-        """
-        Disable the compute node from accepting any new VMs or requests
-        :param node: dict contains node's hostname
-        :return: True pr False
+    def evacuate(self, enable_fencing=True):
+        """Evacuate the infected node.
+        :return: Two lists; the first one will be the succeeded nodes and the 
+        other is the failed nodes
        """
        pass

    @abc.abstractmethod
-    def is_node_disabled(self, node):
-        """
-        Check if node is already disabled or not
-        :param node: dict contains node's hostname
-        :return: True or False
-        """
-        pass
-
-    @abc.abstractmethod
-    def evacuate_nodes(self, nodes):
-        """
-        Will evacuate all running VMs on the required nodes
-        :param nodes: list of nodes
-        :return: list of nodes with updated status
-        """
-        pass
-
-    @abc.abstractmethod
-    def get_node_instances(self, node):
-        """
-        List instances on a compute host
-        :param node: dict contains node's hostname
-        :return: List contains running VMs on a given node
-        """
-        pass
-
    def get_info(self):
        """
        Get Driver Information
        :return: Dict contains driver information
        """
        pass
-
-    @abc.abstractmethod
-    def get_node_status(self, node):
-        """
-        Check the node status and report it
-        :param node: dict contains node's hostname
-        :return: dict with key 'status': 'True or False'
-        """
-        pass
--- a/freezer_dr/evacuators/common/manager.py
+++ b/freezer_dr/evacuators/common/manager.py
@ -25,71 +25,19 @@ LOG = log.getLogger(__name__)
 class EvacuationManager(object):

    def __init__(self, enable_fencing=True):
-        evcuation_conf = CONF.get('evacuation')
-        self.driver = importutils.import_object(
-            evcuation_conf.get('driver'),
-            evcuation_conf.get('wait'),
-            evcuation_conf.get('retries'),
-            evcuation_conf.get('shared_storage'),
-            **evcuation_conf.get('options')
-        )
        self.enable_fencing = enable_fencing
-        self.wait = evcuation_conf.get('wait')
-        self.retires = evcuation_conf.get('retries', 1)
-        if self.retires <= 0:
-            self.retires = 1

    def evacuate(self, nodes):
-        # try to disable node
-        # @todo needs more error handling like if the status didn't update or
-        # we are unable to disable the node ???
-        failed_nodes = []  # maintain nodes that are going to fail at any state
-        succeeded_nodes = []
-        for node in nodes:
-            for i in range(0, self.retires):
-                status = self._disable_node(node)
-                # if True ( node disabled ) break the loop
-                if status:
-                    break
-                else:
-                    status = False
-            node['status'] = status
-            # make sure the disable request was successful
-            if not self.driver.get_node_status(node):
-                failed_nodes.append(node)
-                nodes.remove(node)  # if the node failed at any step no reason
-                # to move it to the next step
-            else:
-                succeeded_nodes.append(node)
+        fencer = FencerManager(nodes)
+        evacuation_conf = CONF.get('evacuation')
+        driver = importutils.import_object(
+            evacuation_conf['driver'],
+            nodes,
+            evacuation_conf,
+            fencer
+        )

-        nodes = succeeded_nodes
-        if self.enable_fencing:
-            fencer = FencerManager(nodes)
-            nodes = fencer.fence()
-        """
-        @todo this code needs to be commented for the time being till we fix
-         nova bug found in state, which always go up afer enable or disable. We
-         will use get_node_details for the time being from the main script to
-         get nodes details before evacuating ...
-        succeeded_nodes = []
-        for node in nodes:
-            node['instances'] = self.driver.get_node_instances(node)
-            succeeded_nodes.append(node)
-
-        nodes = succeeded_nodes
-        """
-        # Start evacuation calls ...
-        evacuated_nodes = []
-        for i in range(0, self.retires):
-            try:
-                sleep(self.wait)
-                nodes = self.driver.evacuate_nodes(nodes)
-                if not nodes:
-                    return evacuated_nodes
-                evacuated_nodes = nodes
-            except Exception as e:
-                LOG.error(e)
-        return evacuated_nodes
+        return driver.evacuate(self.enable_fencing)

    def get_nodes_details(self, nodes):
        """
@ -98,9 +46,3 @@ class EvacuationManager(object):
        :return: list of node with more details
        """
        return get_nodes_details(nodes)
-
-    def _disable_node(self, node):
-        if not self.driver.is_node_disabled(node):
-                return self.driver.disable_node(node)
-        else:
-            True
--- a/freezer_dr/evacuators/drivers/default/standard.py
+++ b/freezer_dr/evacuators/drivers/default/standard.py
@ -15,16 +15,82 @@ from oslo_config import cfg
 from oslo_log import log
 from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
 from freezer_dr.common.utils import get_os_client
+import time
 CONF = cfg.CONF
 LOG = log.getLogger(__name__)


 class StandardEvacuator(EvacuatorBaseDriver):

-    def __init__(self, wait, retires, shared_storage, **kwargs):
-        super(StandardEvacuator, self).__init__(wait, retires, shared_storage,
-                                                **kwargs)
+    def __init__(self, nodes, evacuator_conf, fencer):
+        super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer)
+        # initialize the OS client!
        self.client = get_os_client()
+        self.wait = evacuator_conf.get('wait')
+        self.retires = evacuator_conf.get('retries', 1)
+        if self.retires <= 0:
+            self.retires = 1
+
+    def _disable_node(self, node):
+        if not self.is_node_disabled(node):
+                return self.disable_node(node)
+        else:
+            True
+
+    def evacuate(self, enable_fencing=True):
+        # try to disable node
+        # @todo needs more error handling like if the status didn't update or
+        # we are unable to disable the node ???
+        failed_nodes = []  # maintain nodes that are going to fail at any state
+        succeeded_nodes = []
+        for node in self.nodes:
+            status = False
+            for i in range(0, self.retires):
+                status = self._disable_node(node)
+                # if True ( node disabled ) break the loop
+                if status:
+                    break
+                else:
+                    status = False
+            node['status'] = status
+            # make sure the disable request was successful
+            if not self.get_node_status(node):
+                # if the node failed at any step no reason to move it to
+                # the next step
+                failed_nodes.append(node)
+                self.nodes.remove(node)  #
+            else:
+                succeeded_nodes.append(node)
+
+        nodes = succeeded_nodes
+        if enable_fencing:
+            self.fencer.update_nodes(nodes)
+            nodes = self.fencer.fence()
+        """
+        @todo this code needs to be commented for the time being till we fix
+         nova bug found in state, which always go up afer enable or disable. We
+         will use get_node_details for the time being from the main script to
+         get nodes details before evacuating ...
+        succeeded_nodes = []
+        for node in nodes:
+            node['instances'] = self.driver.get_node_instances(node)
+            succeeded_nodes.append(node)
+
+        nodes = succeeded_nodes
+        """
+        # Start evacuation calls ...
+        evacuated_nodes = []
+        for i in range(0, self.retires):
+            try:
+                time.sleep(self.wait)
+                nodes = self.evacuate_nodes(nodes)
+                if not nodes:
+                    break
+                evacuated_nodes = nodes
+            except Exception as e:
+                LOG.error(e)
+
+        return evacuated_nodes, failed_nodes

    def get_node_instances(self, node):
        return self.client.get_hypervisor_instances(node)
@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver):
        return self.client.get_node_status(node)

    def evacuate_nodes(self, nodes):
-        return self.client.evacuate(nodes, shared_storage=self.shared_storage)
-
-
-
+        return self.client.evacuate(
+            nodes, shared_storage=self.evacuator_conf['shared_storage'])
--- a/freezer_dr/evacuators/drivers/dummy/dummy.py
+++ b/freezer_dr/evacuators/drivers/dummy/dummy.py
@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver):
    of Freezer-DR.
    """

-    def __init__(self, wait, retires, shared_storage, **kwargs):
-        super(DummyEvacuator, self).__init__(wait, retires, shared_storage,
-                                             **kwargs)
+    def __init__(self, nodes, evacuator_conf, fencer):
+        super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer)

    def disable_node(self, node):
        return True
--- a/freezer_dr/main.py
+++ b/freezer_dr/main.py
@ -41,7 +41,11 @@ def main():
        # Shutdown the node
        evac = EvacuationManager()
        notify_nodes = evac.get_nodes_details(nodes)
-        evac.evacuate(nodes)
+        evacuated_nodes, failed_nodes = evac.evacuate(nodes)
+        LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes))
+        LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes))
        notifier.notify(notify_nodes, 'success')
+        failed_nodes = evac.get_nodes_details(failed_nodes)
+        notifier.notify(failed_nodes, 'error')
    else:
        print "No nodes reported to be down"