fuel-main/fuelweb_test/tests/tests_strength/test_failover.py

#    Copyright 2013 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import re
import time

from devops.helpers.helpers import wait
from proboscis.asserts import assert_equal
from proboscis.asserts import assert_not_equal
from proboscis.asserts import assert_true
from proboscis import test

from fuelweb_test.helpers.decorators import log_snapshot_on_error
from fuelweb_test import logger
from fuelweb_test.settings import DEPLOYMENT_MODE_HA
from fuelweb_test.tests.base_test_case import SetupEnvironment
from fuelweb_test.tests.base_test_case import TestBasic


@test(groups=["thread_5", "ha"])
class TestHaFailover(TestBasic):

    @test(depends_on=[SetupEnvironment.prepare_slaves_5],
          groups=["deploy_ha"])
    @log_snapshot_on_error
    def deploy_ha(self):
        """Deploy cluster in HA mode with flat nova-network

        Scenario:
            1. Create cluster
            2. Add 3 nodes with controller roles
            3. Add 2 nodes with compute roles
            4. Deploy the cluster
            8. Make snapshot

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("ready_with_5_slaves")

        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            mode=DEPLOYMENT_MODE_HA
        )
        self.fuel_web.update_nodes(
            cluster_id,
            {
                'slave-01': ['controller'],
                'slave-02': ['controller'],
                'slave-03': ['controller'],
                'slave-04': ['compute'],
                'slave-05': ['compute']
            }
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.env.make_snapshot("deploy_ha", is_make=True)

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_destroy_controllers"])
    @log_snapshot_on_error
    def ha_destroy_controllers(self):
        """Destory two controllers and check pacemaker status is correct

        Scenario:
            1. Destroy first controller
            2. Check pacemaker status
            3. Revert environment
            4. Destroy second controller
            5. Check pacemaker status

        Snapshot deploy_ha

        """

        for devops_node in self.env.nodes().slaves[:2]:
            self.env.revert_snapshot("deploy_ha")

            devops_node.suspend(False)
            self.fuel_web.assert_pacemaker(
                self.env.nodes().slaves[2].name,
                set(self.env.nodes().slaves[:3]) - {devops_node},
                [devops_node])

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_disconnect_controllers"])
    @log_snapshot_on_error
    def ha_disconnect_controllers(self):
        """Disconnect controllers and check pacemaker status is correct

        Scenario:
            1. Disconnect eth3 of the first controller
            2. Check pacemaker status
            3. Revert environment
            4. Disconnect eth3 of the second controller
            5. Check pacemaker status

        Snapshot deploy_ha

        """

        for devops_node in self.env.nodes().slaves[:2]:
            self.env.revert_snapshot("deploy_ha")

            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.check_call('ifconfig eth2 down')
            self.fuel_web.assert_pacemaker(
                self.env.nodes().slaves[2].name,
                set(self.env.nodes().slaves[:3]) - {devops_node},
                [devops_node])

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_delete_vips"])
    @log_snapshot_on_error
    def ha_delete_vips(self):
        """Delete all management and public VIPs on all controller nodes.
        Verify that they are restored.
        Verify total amount of secondary IPs. Should be 2:
        management and public

        Scenario:
            1. Delete all secondary VIP
            2. Wait while it is being restored
            3. Verify it is restored

        Snapshot deploy_ha

        """
        logger.debug('Start reverting of deploy_ha snapshot')
        self.env.revert_snapshot("deploy_ha")
        cluster_id = \
            self.fuel_web.client.get_cluster_id(self.__class__.__name__)
        logger.debug('Cluster id is {0}'.format(cluster_id))
        interfaces = ('hapr-p', 'hapr-m')
        slaves = self.env.nodes().slaves[:3]
        logger.debug("Current nodes are {0}".format([i.name for i in slaves]))
        ips_amount = 0
        for devops_node in slaves:
            # Verify VIPs are started.
            ret = self.fuel_web.get_pacemaker_status(devops_node.name)
            logger.debug("Pacemaker status {0} for node {1}".format
                         (ret, devops_node.name))
            assert_true(
                re.search('vip__management_old\s+\(ocf::mirantis:ns_IPaddr2\):'
                          '\s+Started node', ret),
                'vip management not started. '
                'Current pacemaker status is {0}'.format(ret))
            assert_true(
                re.search('vip__public_old\s+\(ocf::mirantis:ns_IPaddr2\):'
                          '\s+Started node', ret),
                'vip public not started. '
                'Current pacemaker status is {0}'.format(ret))

            for interface in interfaces:
                # Look for management and public ip in namespace and remove it
                logger.debug("Start to looking for ip of Vips")
                addresses = self.fuel_web.ip_address_show(
                    devops_node.name, interface=interface,
                    namespace='haproxy',
                    pipe_str='| grep {0}$'.format(interface))
                logger.debug("Vip addresses is {0} for node {1} and interface"
                             " {2}".format(addresses, devops_node.name,
                                           interface))
                ip_search = re.search(
                    'inet (?P<ip>\d+\.\d+\.\d+.\d+/\d+) scope global '
                    '{0}'.format(interface), addresses)

                if ip_search is None:
                    logger.debug("Ip show output does not"
                                 " match in regex. Current value is None")
                    continue
                ip = ip_search.group('ip')
                logger.debug("Founded ip is {0}".format(ip))
                logger.debug("Start ip {0} deletion on node {1} and "
                             "interface {2} ".format(ip, devops_node.name,
                                                     interface))
                self.fuel_web.ip_address_del(
                    node_name=devops_node.name,
                    interface=interface,
                    ip=ip, namespace='haproxy')

                # The ip should be restored
                ip_assigned = lambda nodes: \
                    any([ip in self.fuel_web.ip_address_show(
                        n.name, 'haproxy',
                        interface, '| grep {0}$'.format(interface))
                        for n in nodes])
                logger.debug("Waiting while deleted ip restores ...")
                wait(lambda: ip_assigned(slaves), timeout=30)
                assert_true(ip_assigned(slaves),
                            "IP isn't restored restored.")
                ips_amount += 1

                time.sleep(60)

                # Run OSTF tests
                failed_test_name = ['Create volume and boot instance from it',
                                    'Create volume and attach it to instance']
                self.fuel_web.run_ostf(
                    cluster_id=cluster_id,
                    test_sets=['ha', 'smoke', 'sanity'],
                    should_fail=3,
                    failed_test_name=failed_test_name)
                # Revert initial state. VIP could be moved to other controller
                self.env.revert_snapshot("deploy_ha")
        assert_equal(ips_amount, 2,
                     'Not all vips were recovered after fail in 10s')

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_mysql_termination"])
    @log_snapshot_on_error
    def ha_mysql_termination(self):
        """Terminate mysql on all controllers one by one

        Scenario:
            1. Terminate mysql
            2. Wait while it is being restarted
            3. Verify it is restarted
            4. Go to another controller

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("deploy_ha")

        for devops_node in self.env.nodes().slaves[:3]:
            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.check_call('kill -9 $(pidof -x mysqld_safe)')
            remote.check_call('kill -9 $(pidof mysqld)')

            mysql_started = lambda: \
                len(remote.check_call(
                    'ps aux | grep "/usr/sbin/mysql"')['stdout']) == 3
            wait(mysql_started, timeout=300)
            assert_true(mysql_started(), 'MySQL restarted')

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_haproxy_termination"])
    @log_snapshot_on_error
    def ha_haproxy_termination(self):
        """Terminate haproxy on all controllers one by one

        Scenario:
            1. Terminate haproxy
            2. Wait while it is being restarted
            3. Verify it is restarted
            4. Go to another controller

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("deploy_ha")

        for devops_node in self.env.nodes().slaves[:3]:
            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.check_call('kill -9 $(pidof haproxy)')

            mysql_started = lambda: \
                len(remote.check_call(
                    'ps aux | grep "/usr/sbin/haproxy"')['stdout']) == 3
            wait(mysql_started, timeout=20)
            assert_true(mysql_started(), 'haproxy restarted')

    @test(depends_on_groups=['deploy_ha'],
          groups=["ha_pacemaker_configuration"])
    @log_snapshot_on_error
    def ha_pacemaker_configuration(self):
        """Verify resources are configured

        Scenario:
            1. SSH to controller node
            2. Verify resources are configured
            3. Go to next controller

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("deploy_ha")

        devops_ctrls = self.env.nodes().slaves[:3]
        for devops_node in devops_ctrls:
            config = self.fuel_web.get_pacemaker_config(devops_node.name)
            for n in devops_ctrls:
                fqdn = self.fuel_web.fqdn(n)
                assert_true(
                    'node {0}'.format(fqdn) in config,
                    'node {0} exists'.format(fqdn))
            assert_not_equal(
                re.search('primitive (openstack-)?heat-engine', config), None,
                'heat engine')
            assert_true('primitive p_haproxy' in config, 'haproxy')
            assert_true('primitive p_mysql' in config, 'mysql')
            assert_true(
                'primitive vip__management_old' in config, 'vip management')
            assert_true(
                'primitive vip__public_old' in config, 'vip public')