Refactor evacuators

Allow the evacuators part to be able to do more and to freely evacuate different types of the workloads(full compute, vms, ...) Change-Id: I2e8b23a48504b8e4ea13f6b86cb9689d9bab5cf1 Depends-On: I5272ca90d806b8ce83055199724abdc14fe414bc
2017-07-13 20:33:48 +01:00 · 2017-07-13 20:33:48 +01:00 · b5af64bd82
parent 4d11ee77d3
commit b5af64bd82
6 changed files with 105 additions and 138 deletions
--- a/etc/freezer-dr.conf.sample
+++ b/etc/freezer-dr.conf.sample
@ -16,12 +16,6 @@
 # Note: This option can be changed without restarting.
 #debug = false
 # DEPRECATED: If set to false, the logging level will be set to WARNING instead
 # of the default INFO level. (boolean value)
 # This option is deprecated for removal.
 # Its value may be silently ignored in the future.
 #verbose = true
 # The name of a logging configuration file. This file is appended to any
 # existing logging configuration files. For details about logging configuration
 # files, see the Python logging module documentation. Note that when logging
@ -60,6 +54,12 @@
 # is set. (boolean value)
 #use_syslog = false
 # Enable journald for logging. If running in a systemd environment you may wish
 # to enable journal support. Doing so will use the journal native protocol
 # which includes structured metadata in addition to log messages.This option is
 # ignored if log_config_append is set. (boolean value)
 #use_journal = false
 # Syslog facility to receive log lines. This option is ignored if
 # log_config_append is set. (string value)
 #syslog_log_facility = LOG_USER
@ -88,7 +88,7 @@
 # List of package logging levels in logger=LEVEL pairs. This option is ignored
 # if log_config_append is set. (list value)
-#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
+#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,dogpile.core.dogpile=INFO
 # Enables or disables publication of error events. (boolean value)
 #publish_errors = false
--- a/freezer_dr/evacuators/common/driver.py
+++ b/freezer_dr/evacuators/common/driver.py
@ -22,72 +22,30 @@ class EvacuatorBaseDriver(object):
    a unified interface
    """
-    def __init__(self, wait, retries, shared_storage, **kwargs):
+    def __init__(self, nodes, evacuator_conf, fencer):
        """
        Initialize Evacuation driver with the config args
-        :param wait: time in seconds that the evcauator should wait before
+        :param nodes: A list of nodes to be evacuated!
-        retrying to disable the node
+        :param evacuator_conf: A dict of arguments that got loaded from the 
-        :param retries: Number of times the evacuator will try to disable the
+        configuration file! 
        compute node
        :param shared_storage: Boolean; True if the compute nodes are running
        on shared storage and False otherwise
        :param kwargs: Dict of arguments that any future driver may need to
        load it from the config file
        :return: None
        """
-        self.wait = wait
+        self.nodes = nodes
-        self.retries = retries
+        self.evacuator_conf = evacuator_conf
-        self.shared_storage = shared_storage
+        self.fencer = fencer
        self.options = kwargs
    @abc.abstractmethod
-    def disable_node(self, node):
+    def evacuate(self, enable_fencing=True):
-        """
+        """Evacuate the infected node.
-        Disable the compute node from accepting any new VMs or requests
+        :return: Two lists; the first one will be the succeeded nodes and the 
-        :param node: dict contains node's hostname
+        other is the failed nodes
        :return: True pr False
        """
        pass
    @abc.abstractmethod
    def is_node_disabled(self, node):
        """
        Check if node is already disabled or not
        :param node: dict contains node's hostname
        :return: True or False
        """
        pass
    @abc.abstractmethod
    def evacuate_nodes(self, nodes):
        """
        Will evacuate all running VMs on the required nodes
        :param nodes: list of nodes
        :return: list of nodes with updated status
        """
        pass
    @abc.abstractmethod
    def get_node_instances(self, node):
        """
        List instances on a compute host
        :param node: dict contains node's hostname
        :return: List contains running VMs on a given node
        """
        pass
    def get_info(self):
        """
        Get Driver Information
        :return: Dict contains driver information
        """
        pass
    @abc.abstractmethod
    def get_node_status(self, node):
        """
        Check the node status and report it
        :param node: dict contains node's hostname
        :return: dict with key 'status': 'True or False'
        """
        pass
--- a/freezer_dr/evacuators/common/manager.py
+++ b/freezer_dr/evacuators/common/manager.py
@ -25,71 +25,19 @@ LOG = log.getLogger(__name__)
 class EvacuationManager(object):
    def __init__(self, enable_fencing=True):
        evcuation_conf = CONF.get('evacuation')
        self.driver = importutils.import_object(
            evcuation_conf.get('driver'),
            evcuation_conf.get('wait'),
            evcuation_conf.get('retries'),
            evcuation_conf.get('shared_storage'),
            **evcuation_conf.get('options')
        )
        self.enable_fencing = enable_fencing
        self.wait = evcuation_conf.get('wait')
        self.retires = evcuation_conf.get('retries', 1)
        if self.retires <= 0:
            self.retires = 1
    def evacuate(self, nodes):
-        # try to disable node
+        fencer = FencerManager(nodes)
-        # @todo needs more error handling like if the status didn't update or
+        evacuation_conf = CONF.get('evacuation')
-        # we are unable to disable the node ???
+        driver = importutils.import_object(
-        failed_nodes = []  # maintain nodes that are going to fail at any state
+            evacuation_conf['driver'],
-        succeeded_nodes = []
+            nodes,
-        for node in nodes:
+            evacuation_conf,
-            for i in range(0, self.retires):
+            fencer
-                status = self._disable_node(node)
+        )
                # if True ( node disabled ) break the loop
                if status:
                    break
                else:
                    status = False
            node['status'] = status
            # make sure the disable request was successful
            if not self.driver.get_node_status(node):
                failed_nodes.append(node)
                nodes.remove(node)  # if the node failed at any step no reason
                # to move it to the next step
            else:
                succeeded_nodes.append(node)
-        nodes = succeeded_nodes
+        return driver.evacuate(self.enable_fencing)
        if self.enable_fencing:
            fencer = FencerManager(nodes)
            nodes = fencer.fence()
        """
        @todo this code needs to be commented for the time being till we fix
         nova bug found in state, which always go up afer enable or disable. We
         will use get_node_details for the time being from the main script to
         get nodes details before evacuating ...
        succeeded_nodes = []
        for node in nodes:
            node['instances'] = self.driver.get_node_instances(node)
            succeeded_nodes.append(node)
        nodes = succeeded_nodes
        """
        # Start evacuation calls ...
        evacuated_nodes = []
        for i in range(0, self.retires):
            try:
                sleep(self.wait)
                nodes = self.driver.evacuate_nodes(nodes)
                if not nodes:
                    return evacuated_nodes
                evacuated_nodes = nodes
            except Exception as e:
                LOG.error(e)
        return evacuated_nodes
    def get_nodes_details(self, nodes):
        """
@ -98,9 +46,3 @@ class EvacuationManager(object):
        :return: list of node with more details
        """
        return get_nodes_details(nodes)
    def _disable_node(self, node):
        if not self.driver.is_node_disabled(node):
                return self.driver.disable_node(node)
        else:
            True
--- a/freezer_dr/evacuators/drivers/default/standard.py
+++ b/freezer_dr/evacuators/drivers/default/standard.py
@ -15,16 +15,82 @@ from oslo_config import cfg
 from oslo_log import log
 from freezer_dr.evacuators.common.driver import EvacuatorBaseDriver
 from freezer_dr.common.utils import get_os_client
 import time
 CONF = cfg.CONF
 LOG = log.getLogger(__name__)
 class StandardEvacuator(EvacuatorBaseDriver):
-    def __init__(self, wait, retires, shared_storage, **kwargs):
+    def __init__(self, nodes, evacuator_conf, fencer):
-        super(StandardEvacuator, self).__init__(wait, retires, shared_storage,
+        super(StandardEvacuator, self).__init__(nodes, evacuator_conf, fencer)
-                                                **kwargs)
+        # initialize the OS client!
        self.client = get_os_client()
        self.wait = evacuator_conf.get('wait')
        self.retires = evacuator_conf.get('retries', 1)
        if self.retires <= 0:
            self.retires = 1
    def _disable_node(self, node):
        if not self.is_node_disabled(node):
                return self.disable_node(node)
        else:
            True
    def evacuate(self, enable_fencing=True):
        # try to disable node
        # @todo needs more error handling like if the status didn't update or
        # we are unable to disable the node ???
        failed_nodes = []  # maintain nodes that are going to fail at any state
        succeeded_nodes = []
        for node in self.nodes:
            status = False
            for i in range(0, self.retires):
                status = self._disable_node(node)
                # if True ( node disabled ) break the loop
                if status:
                    break
                else:
                    status = False
            node['status'] = status
            # make sure the disable request was successful
            if not self.get_node_status(node):
                # if the node failed at any step no reason to move it to
                # the next step
                failed_nodes.append(node)
                self.nodes.remove(node)  #
            else:
                succeeded_nodes.append(node)
        nodes = succeeded_nodes
        if enable_fencing:
            self.fencer.update_nodes(nodes)
            nodes = self.fencer.fence()
        """
        @todo this code needs to be commented for the time being till we fix
         nova bug found in state, which always go up afer enable or disable. We
         will use get_node_details for the time being from the main script to
         get nodes details before evacuating ...
        succeeded_nodes = []
        for node in nodes:
            node['instances'] = self.driver.get_node_instances(node)
            succeeded_nodes.append(node)
        nodes = succeeded_nodes
        """
        # Start evacuation calls ...
        evacuated_nodes = []
        for i in range(0, self.retires):
            try:
                time.sleep(self.wait)
                nodes = self.evacuate_nodes(nodes)
                if not nodes:
                    break
                evacuated_nodes = nodes
            except Exception as e:
                LOG.error(e)
        return evacuated_nodes, failed_nodes
    def get_node_instances(self, node):
        return self.client.get_hypervisor_instances(node)
@ -39,7 +105,5 @@ class StandardEvacuator(EvacuatorBaseDriver):
        return self.client.get_node_status(node)
    def evacuate_nodes(self, nodes):
-        return self.client.evacuate(nodes, shared_storage=self.shared_storage)
+        return self.client.evacuate(
-
+            nodes, shared_storage=self.evacuator_conf['shared_storage'])
--- a/freezer_dr/evacuators/drivers/dummy/dummy.py
+++ b/freezer_dr/evacuators/drivers/dummy/dummy.py
@ -19,9 +19,8 @@ class DummyEvacuator(EvacuatorBaseDriver):
    of Freezer-DR.
    """
-    def __init__(self, wait, retires, shared_storage, **kwargs):
+    def __init__(self, nodes, evacuator_conf, fencer):
-        super(DummyEvacuator, self).__init__(wait, retires, shared_storage,
+        super(DummyEvacuator, self).__init__(nodes, evacuator_conf, fencer)
                                             **kwargs)
    def disable_node(self, node):
        return True
--- a/freezer_dr/main.py
+++ b/freezer_dr/main.py
@ -41,7 +41,11 @@ def main():
        # Shutdown the node
        evac = EvacuationManager()
        notify_nodes = evac.get_nodes_details(nodes)
-        evac.evacuate(nodes)
+        evacuated_nodes, failed_nodes = evac.evacuate(nodes)
        LOG.debug("Successfully evacuated nodes {0}".format(evacuated_nodes))
        LOG.debug("Failed to evacuate nodes {0}".format(failed_nodes))
        notifier.notify(notify_nodes, 'success')
        failed_nodes = evac.get_nodes_details(failed_nodes)
        notifier.notify(failed_nodes, 'error')
    else:
        print "No nodes reported to be down"