Check tasks status after cluster deployment

1. add FuelWebClient.assert_all_tasks_completed, which could check status
    for specified cluster or wide
2. add NailgunClient.get_all_tasks_list for retrieving all tasks info from database
3. Call tasks status check after deployment success before short OSTF (
    self.assert_ha_services_ready(cluster_id)
    self.assert_os_services_ready(cluster_id)
    )
4. add TestCliBase.assert_all_tasks_completed
5. call TestCliBase.assert_all_tasks_completed after deployment

Change-Id: I428fc17434d40951c6eb6cefe88dd007d1eb0761
Closes-bug: #1564943
This commit is contained in:
Alexey Stepanov 2016-04-05 14:29:19 +03:00
parent 6c1a7fb92d
commit 41071ba6b9
5 changed files with 232 additions and 5 deletions

View File

@ -1299,3 +1299,78 @@ def check_snapshot_logs(ip, snapshot_name, controller_fqdns, compute_fqdns):
logger.debug("missed logs are {}".format(absent_logs))
assert_false(absent_logs, "Next logs aren't present"
" in snapshot logs {}".format(absent_logs))
def incomplete_tasks(tasks, cluster_id=None):
def get_last_tasks():
last_tasks = {}
for tsk in tasks:
if cluster_id is not None and cluster_id != tsk['cluster']:
continue
if (tsk['cluster'], tsk['name']) not in last_tasks:
last_tasks[(tsk['cluster'], tsk['name'])] = tsk
return last_tasks
deploy_tasks = {}
not_ready_tasks = {}
allowed_statuses = {'ready', 'skipped'}
for (task_cluster, task_name), task in get_last_tasks().items():
if task_name == 'deployment':
deploy_tasks[task['cluster']] = task['id']
if task['status'] not in allowed_statuses:
if task_cluster not in not_ready_tasks:
not_ready_tasks[task_cluster] = []
not_ready_tasks[task_cluster].append(task)
return not_ready_tasks, deploy_tasks
def incomplete_deploy(deployment_tasks):
allowed_statuses = {'ready', 'skipped'}
not_ready_deploy = {}
for cluster_id, tasks in deployment_tasks.items():
not_ready_jobs = {}
for task in filter(
lambda tsk: tsk['status'] not in allowed_statuses,
tasks):
if task['node_id'] not in not_ready_jobs:
not_ready_jobs[task['node_id']] = []
not_ready_jobs[task['node_id']].append(task)
if not_ready_jobs:
not_ready_deploy[cluster_id] = not_ready_jobs
return not_ready_deploy
def fail_deploy(not_ready_transactions):
if len(not_ready_transactions) > 0:
cluster_info_template = "\n\tCluster ID: {cluster}{info}\n"
task_details_template = (
"\n"
"\t\t\tTask name: {deployment_graph_task_name}\n"
"\t\t\t\tStatus: {status}\n"
"\t\t\t\tStart: {time_start}\n"
"\t\t\t\tEnd: {time_end}\n"
)
failure_text = 'Not all deployments tasks completed: {}'.format(
''.join(
cluster_info_template.format(
cluster=cluster,
info="".join(
"\n\t\tNode: {node_id}{details}\n".format(
node_id=node_id,
details="".join(
task_details_template.format(**task)
for task in sorted(
tasks,
key=lambda item: item['status'])
))
for node_id, tasks in sorted(records.items())
))
for cluster, records in sorted(not_ready_transactions.items())
))
logger.error(failure_text)
assert_true(len(not_ready_transactions) == 0, failure_text)

View File

@ -342,6 +342,51 @@ class FuelWebClient(object):
)
)
@logwrap
def assert_all_tasks_completed(self, cluster_id=None):
cluster_info_template = "\n\tCluster ID: {cluster}{info}\n"
all_tasks = sorted(
self.client.get_all_tasks_list(),
key=lambda _tsk: _tsk['id'],
reverse=True
)
not_ready_tasks, deploy_tasks = checkers.incomplete_tasks(
all_tasks, cluster_id)
not_ready_transactions = checkers.incomplete_deploy(
{
cluster: self.client.get_deployment_task_hist(task_id)
for cluster, task_id in deploy_tasks.items()})
if len(not_ready_tasks) > 0:
task_details_template = (
"\n"
"\t\tTask name: {name}\n"
"\t\t\tStatus: {status}\n"
"\t\t\tProgress: {progress}\n"
"\t\t\tResult: {result}\n"
"\t\t\tMessage: {message}\n"
"\t\t\tTask ID: {id}"
)
task_text = 'Not all tasks completed: {}'.format(
''.join(
cluster_info_template.format(
cluster=cluster,
info="".join(
task_details_template.format(**task)
for task in tasks))
for cluster, tasks in sorted(not_ready_tasks.items())
))
logger.error(task_text)
if len(not_ready_transactions) == 0:
# Else: we will raise assert with detailed info
# about deployment
assert_true(len(not_ready_tasks) == 0, task_text)
checkers.fail_deploy(not_ready_transactions)
@logwrap
def fqdn(self, devops_node):
logger.info('Get FQDN of a devops node %s', devops_node.name)
@ -783,7 +828,10 @@ class FuelWebClient(object):
attributes = self.client.get_cluster_attributes(cluster_id)
return attributes['editable']['repo_setup']['repos']
def check_deploy_state(self, cluster_id, check_services=True):
def check_deploy_state(self, cluster_id, check_services=True,
check_tasks=True):
if check_tasks:
self.assert_all_tasks_completed(cluster_id=cluster_id)
if check_services:
self.assert_ha_services_ready(cluster_id)
self.assert_os_services_ready(cluster_id)
@ -825,12 +873,12 @@ class FuelWebClient(object):
@custom_repo
def deploy_cluster_wait(self, cluster_id, is_feature=False,
timeout=help_data.DEPLOYMENT_TIMEOUT, interval=30,
check_services=True):
check_services=True, check_tasks=True):
if not is_feature and help_data.DEPLOYMENT_RETRIES == 1:
logger.info('Deploy cluster %s', cluster_id)
task = self.deploy_cluster(cluster_id)
self.assert_task_success(task, interval=interval, timeout=timeout)
self.check_deploy_state(cluster_id, check_services)
self.check_deploy_state(cluster_id, check_services, check_tasks)
return
logger.info('Provision nodes of a cluster %s', cluster_id)
@ -842,7 +890,7 @@ class FuelWebClient(object):
cluster_id, str(retry_number + 1))
task = self.client.deploy_nodes(cluster_id)
self.assert_task_success(task, timeout=timeout, interval=interval)
self.check_deploy_state(cluster_id, check_services)
self.check_deploy_state(cluster_id, check_services, check_tasks)
def deploy_cluster_wait_progress(self, cluster_id, progress,
return_task=None):

View File

@ -688,3 +688,16 @@ class NailgunClient(object):
"""
url = '/api/v1/nodes/{}/attributes/'.format(node_id)
return self.client.get(url)
@logwrap
@json_parse
def get_all_tasks_list(self):
url = '/api/transactions/'
return self.client.get(url)
@logwrap
@json_parse
def get_deployment_task_hist(self, task_id):
url = '/api/transactions/{task_id}/deployment_history'.format(
task_id=task_id)
return self.client.get(url)

View File

@ -194,6 +194,8 @@ class CommandLineTest(test_cli_base.CommandLine):
cmd,
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Deploy the compute nodes
cmd = ('fuel --env-id={0} node --deploy --node {1},{2} --json'.format(
cluster_id, node_ids[1], node_ids[2]))
@ -201,6 +203,8 @@ class CommandLineTest(test_cli_base.CommandLine):
cmd,
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=30 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Verify networks
self.fuel_web.verify_network(cluster_id)
controller_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
@ -430,6 +434,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
1,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -451,6 +457,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=10 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
2,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -472,6 +480,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=10 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
3,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -493,6 +503,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=10 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
4,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -517,6 +529,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=10 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
6,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -537,6 +551,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=10 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
assert_equal(
7,
len(self.fuel_web.get_nailgun_node_by_status('provisioned')),
@ -572,6 +588,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
self.show_step(13, details='for node id {}'.format(node_ids[1]))
# Deploy the compute node node_ids[1]
cmd = ('fuel --env-id={0} node --deploy --node {1} --json'.format(
@ -581,6 +599,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=30 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Deploy the cinder node node_ids[2]
self.show_step(14, details='for node id {}'.format(node_ids[2]))
cmd = ('fuel --env-id={0} node --deploy --node {1} --json'.format(
@ -590,6 +610,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Deploy the mongo node node_ids[3]
self.show_step(15, details='for node id {}'.format(node_ids[3]))
cmd = ('fuel --env-id={0} node --deploy --node {1} --json'.format(
@ -599,6 +621,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Deploy ceph-osd nodes node_ids[4] node_ids[5]
self.show_step(16, details='for node ids {0}, {1}'.format(
node_ids[4], node_ids[5]))
@ -609,6 +633,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
# Deploy the base-os node node_ids[6]
self.show_step(17, details='for node id {}'.format(node_ids[6]))
cmd = ('fuel --env-id={0} node --deploy --node {1} --json'.format(
@ -618,6 +644,8 @@ class CommandLineTest(test_cli_base.CommandLine):
jsonify=True)['stdout_json']
self.assert_cli_task_success(task, timeout=60 * 60)
self.assert_all_tasks_completed(cluster_id=cluster_id)
self.fuel_web.verify_network(cluster_id)
self.show_step(18)
node_discover_after_deploy = self.fuel_web.get_nailgun_node_by_status(

View File

@ -12,9 +12,11 @@
# License for the specific language governing permissions and limitations
# under the License.
import time
import json
import time
from proboscis.asserts import assert_equal
from proboscis.asserts import assert_true
from devops.error import TimeoutError
from devops.helpers.helpers import wait
@ -22,6 +24,9 @@ from devops.helpers.helpers import wait
from six.moves import urllib
# pylint: enable=import-error
from fuelweb_test.helpers.checkers import fail_deploy
from fuelweb_test.helpers.checkers import incomplete_deploy
from fuelweb_test.helpers.checkers import incomplete_tasks
from fuelweb_test.helpers.ssl_helpers import change_cluster_ssl_config
from fuelweb_test.tests.base_test_case import TestBasic
from fuelweb_test import logwrap
@ -107,6 +112,64 @@ class CommandLine(TestBasic):
)
)
@logwrap
def get_all_tasks_list(self):
return self.ssh_manager.execute_on_remote(
ip=self.ssh_manager.admin_ip,
cmd='fuel2 task list -f json',
jsonify=True)['stdout_json']
@logwrap
def get_deployment_task_hist(self, task_id):
return self.ssh_manager.execute_on_remote(
ip=self.ssh_manager.admin_ip,
cmd='fuel2 task history show {} -f json'.format(task_id),
jsonify=True
)['stdout_json']
@logwrap
def assert_all_tasks_completed(self, cluster_id=None):
cluster_info_template = "\n\tCluster ID: {cluster}{info}\n"
all_tasks = sorted(
self.get_all_tasks_list(),
key=lambda _tsk: _tsk['id'],
reverse=True
)
not_ready_tasks, deploy_tasks = incomplete_tasks(
all_tasks, cluster_id)
not_ready_transactions = incomplete_deploy(
{
cluster: self.get_deployment_task_hist(task_id)
for cluster, task_id in deploy_tasks.items()})
if len(not_ready_tasks) > 0:
task_details_template = (
"\n"
"\t\tTask name: {name}\n"
"\t\t\tStatus: {status}\n"
"\t\t\tProgress: {progress}\n"
"\t\t\tResult: {result}\n"
"\t\t\tTask ID: {id}"
)
task_text = 'Not all tasks completed: {}'.format(
''.join(
cluster_info_template.format(
cluster=cluster,
info="".join(
task_details_template.format(**task)
for task in tasks))
for cluster, tasks in sorted(not_ready_tasks.items())
))
logger.error(task_text)
if len(not_ready_transactions) == 0:
# Else: we will raise assert with detailed info
# about deployment
assert_true(len(not_ready_tasks) == 0, task_text)
fail_deploy(not_ready_transactions)
@staticmethod
@logwrap
def hiera_floating_ranges(node_ip):