fuel-ostf/fuel_health/saharamanager.py

228 lines
9.6 KiB
Python

# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import time
from fuel_health.common.utils.data_utils import rand_name
from fuel_health import nmanager
LOG = logging.getLogger(__name__)
class SaharaTestsManager(nmanager.PlatformServicesBaseClass):
def setUp(self):
super(SaharaTestsManager, self).setUp()
self.check_clients_state()
# Timeout (in seconds) to wait for cluster deployment.
self.cluster_timeout = 3000
# Timeout (in seconds) to wait for cluster deletion.
self.delete_timeout = 300
# Timeout (in seconds) between status checks.
self.request_timeout = 5
# Timeout (in seconds) to wait for starting a Hadoop process
# on a cluster node.
self.process_timeout = 300
# The minimum amount of available RAM for one of the compute nodes
# to run Sahara platform tests.
self.min_required_ram_mb = 4096
# The path to the file where a SSH private key for Sahara tests
# will be located.
self.path_to_private_key = '/tmp/sahara-ostf.pem'
def create_flavor(self, ram=1024, vcpus=1, disk=20):
"""This method creates a flavor for Sahara tests.
All resources created by this method will be automatically deleted.
"""
LOG.debug('Creating flavor for Sahara tests...')
name = rand_name('sahara-flavor-')
flavor = self.compute_client.flavors.create(name, ram, vcpus, disk)
self.addCleanup(self.compute_client.flavors.delete, flavor.id)
LOG.debug('Flavor for Sahara tests has been created.')
return flavor.id
def _create_key_pair(self):
"""This method creates a key pair for Sahara platform tests.
All resources created by this method will be automatically deleted.
"""
LOG.debug('Creating key pair for Sahara tests...')
name = rand_name('sahara-key-pair-')
key_pair = self.compute_client.keypairs.create(name)
self.addCleanup(key_pair.delete)
self._run_ssh_cmd('echo "{0}" > {1}'.format(key_pair.private_key,
self.path_to_private_key))
LOG.debug('Key pair for Sahara tests has been created.')
return name
# Methods for creating Sahara resources.
def create_cluster_template(self, name, plugin,
hadoop_version, node_groups, **kwargs):
"""This method creates a cluster template.
It supports passing additional params using **kwargs and returns ID
of created resource. All resources created by this method will be
automatically deleted.
"""
LOG.debug('Creating cluster template with name "{0}"...'.format(name))
# TODO(ylobankov): remove this loop after fixing bug #1314578
for node_group in node_groups:
if 'floating_ip_pool' in node_group:
if node_group['floating_ip_pool'] is None:
del node_group['floating_ip_pool']
cl_template = self.sahara_client.cluster_templates.create(
name, plugin, hadoop_version, node_groups=node_groups, **kwargs)
self.addCleanup(
self.delete_resource,
delete_method=lambda: self.sahara_client.cluster_templates.delete(
cl_template.id),
get_method=lambda: self.sahara_client.cluster_templates.get(
cl_template.id),
timeout=self.delete_timeout, sleep=self.request_timeout)
LOG.debug('Cluster template "{0}" has been created.'.format(name))
return cl_template.id
def create_cluster(self, name, plugin, hadoop_version,
default_image_id, node_groups=None, **kwargs):
"""This method creates a cluster.
It supports passing additional params using **kwargs and returns ID
of created resource. All resources created by this method will be
automatically deleted.
"""
key_pair_name = self._create_key_pair()
LOG.debug('Creating cluster with name "{0}"...'.format(name))
cluster = self.sahara_client.clusters.create(
name, plugin, hadoop_version, default_image_id=default_image_id,
user_keypair_id=key_pair_name, node_groups=node_groups, **kwargs)
self.addCleanup(
self.delete_resource,
delete_method=lambda: self.sahara_client.clusters.delete(
cluster.id),
get_method=lambda: self.sahara_client.clusters.get(cluster.id),
timeout=self.delete_timeout, sleep=self.request_timeout)
LOG.debug('Cluster "{0}" has been created.'.format(name))
return cluster.id
# Methods for checking cluster deployment.
def poll_cluster_status(self, cluster_id):
"""This method polls cluster status.
It polls cluster every <request_timeout> seconds for some timeout and
waits for when cluster gets to "Active" status.
"""
LOG.debug('Waiting for cluster to build and get to "Active" status...')
previous_cluster_status = 'An unknown cluster status'
start = time.time()
while time.time() - start < self.cluster_timeout:
cluster = self.sahara_client.clusters.get(cluster_id)
if cluster.status != previous_cluster_status:
LOG.debug('Currently cluster is '
'in "{0}" status.'.format(cluster.status))
previous_cluster_status = cluster.status
if cluster.status == 'Active':
return
if cluster.status == 'Error':
self.fail('Cluster failed to build and is in "Error" status.')
time.sleep(self.request_timeout)
self.fail('Cluster failed to get to "Active" '
'status within {0} seconds.'.format(self.cluster_timeout))
def check_hadoop_services(self, cluster_id, processes_map):
"""This method checks deployment of Hadoop services on cluster.
It checks whether all Hadoop processes are running on cluster nodes
or not.
"""
LOG.debug('Checking deployment of Hadoop services on cluster...')
node_ips_and_processes = self._get_node_ips_and_processes(cluster_id)
for node_ip, processes in node_ips_and_processes.items():
LOG.debug('Checking Hadoop processes '
'on node {0}...'.format(node_ip))
for process in processes:
if process in processes_map:
LOG.debug('Checking process "{0}"...'.format(process))
for port in processes_map[process]:
self._check_port(node_ip, port)
LOG.debug('Process "{0}" is running and listening '
'to port {1}.'.format(process, port))
LOG.debug('All Hadoop processes are '
'running on node {0}.'.format(node_ip))
LOG.debug(
'All Hadoop services have been successfully deployed on cluster.')
def _check_port(self, node_ip, port):
"""This method checks accessibility of specific port on cluster node.
It tries to establish connection to the process on specific port every
second for some timeout.
"""
start = time.time()
while time.time() - start < self.process_timeout:
cmd = ("timeout {0} bash -c 'telnet {1} {2}'".format(
self.request_timeout, node_ip, port))
output, output_err = self._run_ssh_cmd(cmd)
if 'Connected to {0}'.format(node_ip) in output:
return
time.sleep(self.request_timeout)
self.fail('Port {0} on node {1} is unreachable for '
'{2} seconds.'.format(port, node_ip, self.process_timeout))
def _get_node_ips_and_processes(self, cluster_id):
"""This method makes dictionary with information of cluster nodes.
Each key of dictionary is IP of cluster node, value is list of Hadoop
processes that must be started on node.
"""
data = self.sahara_client.clusters.get(cluster_id)
node_ips_and_processes = {}
for node_group in data.node_groups:
for instance in node_group['instances']:
node_ip = instance['management_ip']
node_ips_and_processes[node_ip] = node_group['node_processes']
return node_ips_and_processes
def check_node_access_via_ssh(self, cluster_id):
"""This method checks ability to log into cluster nodes via SSH."""
LOG.debug('Checking ability '
'to log into cluster nodes via SSH...')
cmd = ('ssh -i {0} '
'-oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null {1}@'
.format(self.path_to_private_key, self.ssh_username))
for node_ip in self._get_node_ips_and_processes(cluster_id):
LOG.debug('Trying to log into node {0} via SSH...'.format(node_ip))
self._run_ssh_cmd(cmd + node_ip + ' ls -a')
LOG.debug('Node {0} is accessible via SSH.'.format(node_ip))
LOG.debug('All cluster nodes are accessible via SSH.')