Revert "Adds execution of post-upgrade validations on undercloud upgrade"

There is a race condition that makes overcloud deployment
randomly failing. See the bug report.

Reverting for now because the gate is failing too much
times on it and we don't have a proper solution.

Partial-Bug: #1713832
This reverts commit dd3398f214.

Change-Id: I18a55efc78b6dc5fcb83248961eee078cdd6e89d
(cherry picked from commit 5e4b286a55)
This commit is contained in:
Emilien Macchi 2017-08-29 22:43:54 +00:00
parent 1d8ad41db9
commit 2f67da1534
3 changed files with 36 additions and 159 deletions

View File

@ -18,7 +18,6 @@ import json
import os
import subprocess
import tempfile
import time
import fixtures
from keystoneauth1 import exceptions as ks_exceptions
@ -55,9 +54,7 @@ class TestUndercloud(BaseTestCase):
@mock.patch('instack_undercloud.undercloud._generate_environment')
@mock.patch('instack_undercloud.undercloud._load_config')
@mock.patch('instack_undercloud.undercloud._die_tuskar_die')
@mock.patch('instack_undercloud.undercloud._run_validation_groups')
def test_install(self, mock_run_validation_groups,
mock_die_tuskar_die, mock_load_config,
def test_install(self, mock_die_tuskar_die, mock_load_config,
mock_generate_environment, mock_run_instack,
mock_run_clean_all, mock_run_yum_update, mock_run_orc,
mock_post_config, mock_run_command,
@ -74,7 +71,6 @@ class TestUndercloud(BaseTestCase):
['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm')
mock_upgrade_fact.assert_called_with(False)
mock_die_tuskar_die.assert_not_called()
mock_run_validation_groups.assert_not_called()
@mock.patch('instack_undercloud.undercloud._handle_upgrade_fact')
@mock.patch('instack_undercloud.undercloud._configure_logging')
@ -88,9 +84,7 @@ class TestUndercloud(BaseTestCase):
@mock.patch('instack_undercloud.undercloud._generate_environment')
@mock.patch('instack_undercloud.undercloud._load_config')
@mock.patch('instack_undercloud.undercloud._die_tuskar_die')
@mock.patch('instack_undercloud.undercloud._run_validation_groups')
def test_install_upgrade(self, mock_run_validation_groups,
mock_die_tuskar_die, mock_load_config,
def test_install_upgrade(self, mock_die_tuskar_die, mock_load_config,
mock_generate_environment, mock_run_instack,
mock_run_yum_clean_all, mock_run_yum_update,
mock_run_orc, mock_post_config, mock_run_command,
@ -107,7 +101,6 @@ class TestUndercloud(BaseTestCase):
['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm')
mock_upgrade_fact.assert_called_with(True)
mock_die_tuskar_die.assert_called_once()
mock_run_validation_groups.assert_called_once()
@mock.patch('instack_undercloud.undercloud._configure_logging')
def test_install_exception(self, mock_configure_logging):
@ -747,67 +740,6 @@ class TestPostConfig(base.BaseTestCase):
mock_post_config_mistral.assert_called_once_with(
instack_env, mock_instance_mistral, mock_instance_swift)
@mock.patch('instack_undercloud.undercloud._get_auth_values')
@mock.patch('instack_undercloud.undercloud._get_session')
@mock.patch('mistralclient.api.client.client', autospec=True)
def test_run_validation_groups_success(self, mock_mistral_client,
mock_get_session,
mock_auth_values):
mock_mistral = mock.Mock()
mock_mistral_client.return_value = mock_mistral
mock_mistral.environments.list.return_value = []
mock_mistral.executions.get.return_value = mock.Mock(state="SUCCESS")
mock_get_session.return_value = mock.MagicMock()
undercloud._run_validation_groups(["post-upgrade"])
mock_mistral.executions.create.assert_called_once_with(
'tripleo.validations.v1.run_groups',
workflow_input={
'group_names': ['post-upgrade'],
}
)
@mock.patch('instack_undercloud.undercloud._get_auth_values')
@mock.patch('instack_undercloud.undercloud._get_session')
@mock.patch('mistralclient.api.client.client', autospec=True)
@mock.patch('time.strptime')
def test_run_validation_groups_fail(self, mock_strptime,
mock_mistral_client, mock_get_session,
mock_auth_values):
mock_mistral = mock.Mock()
mock_mistral_client.return_value = mock_mistral
mock_mistral.environments.list.return_value = []
mock_mistral.executions.get.return_value = mock.Mock(state="FAIL")
mock_mistral.executions.get_output.return_value = "ERROR!"
mock_mistral.executions.get.id = "1234"
mock_mistral.action_executions.list.return_value = []
mock_strptime.return_value = time.mktime(time.localtime())
mock_get_session.return_value = mock.MagicMock()
self.assertRaises(
RuntimeError, undercloud._run_validation_groups, ["post-upgrade"],
"", 360, True)
@mock.patch('instack_undercloud.undercloud._get_auth_values')
@mock.patch('instack_undercloud.undercloud._get_session')
@mock.patch('mistralclient.api.client.client', autospec=True)
@mock.patch('time.strptime')
def test_run_validation_groups_timeout(self, mock_strptime,
mock_mistral_client,
mock_get_session, mock_auth_values):
mock_mistral = mock.Mock()
mock_mistral_client.return_value = mock_mistral
mock_mistral.environments.list.return_value = []
mock_mistral.executions.get.id = "1234"
mock_mistral.action_executions.list.return_value = []
mock_get_session.return_value = mock.MagicMock()
mock_time = mock.MagicMock()
mock_time.return_value = time.mktime(time.localtime())
mock_strptime.return_value = time.mktime(time.localtime())
with mock.patch('time.time', mock_time):
self.assertRaisesRegexp(RuntimeError, ("TIMEOUT waiting for "
"execution"),
undercloud._run_validation_groups,
["post-upgrade"], "", -1, True)
def test_create_default_plan(self):
mock_mistral = mock.Mock()
mock_mistral.environments.list.return_value = []
@ -870,12 +802,10 @@ class TestPostConfig(base.BaseTestCase):
RuntimeError,
undercloud._create_default_plan, mock_mistral, [], timeout=0)
@mock.patch('time.strptime')
def test_create_default_plan_failed(self, mock_strptime):
def test_create_default_plan_failed(self):
mock_mistral = mock.Mock()
mock_mistral.executions.get.return_value = mock.Mock(state="ERROR")
mock_mistral.action_executions.list.return_value = []
mock_strptime.return_value = time.mktime(time.localtime())
self.assertRaises(
RuntimeError,
undercloud._create_default_plan, mock_mistral, [])

View File

@ -1488,72 +1488,6 @@ def _migrate_plans(mistral, swift, plans):
mistral.environments.delete(plan)
def _wait_for_mistral_execution(timeout_at, mistral, execution, message='',
fail_on_error=False):
while time.time() < timeout_at:
exe = mistral.executions.get(execution.id)
if exe.state == "RUNNING":
time.sleep(5)
continue
if exe.state == "SUCCESS":
return
else:
exe_out = ""
exe_created_at = time.strptime(exe.created_at,
"%Y-%m-%d %H:%M:%S")
ae_list = mistral.action_executions.list()
for ae in ae_list:
if ((ae.task_name == "run_validation") and
(ae.state == "ERROR") and
(time.strptime(ae.created_at, "%Y-%m-%d %H:%M:%S") >
exe_created_at)):
task = mistral.tasks.get(ae.task_execution_id)
task_res = task.to_dict().get('result')
exe_out = "%s %s" % (exe_out, task_res)
error_message = "ERROR %s %s Mistral execution ID: %s" % (
message, exe_out, execution.id)
LOG.error(error_message)
if fail_on_error:
raise RuntimeError(error_message)
return
else:
exe = mistral.executions.get(execution.id)
error_message = ("TIMEOUT waiting for execution %s to finish. "
"State: %s" % (exe.id, exe.state))
LOG.error(error_message)
if fail_on_error:
raise RuntimeError(error_message)
def _get_session():
user, password, project, auth_url = _get_auth_values()
auth_kwargs = {
'auth_url': auth_url,
'username': user,
'password': password,
'project_name': project,
'project_domain_name': 'Default',
'user_domain_name': 'Default',
}
auth_plugin = ks_auth.Password(**auth_kwargs)
return session.Session(auth=auth_plugin)
def _run_validation_groups(groups=[], mistral_url='', timeout=360,
fail_on_error=False):
sess = _get_session()
mistral = mistralclient.client(mistral_url=mistral_url, session=sess)
LOG.info('Starting and waiting for validation groups %s ', groups)
execution = mistral.executions.create(
'tripleo.validations.v1.run_groups',
workflow_input={'group_names': groups}
)
fail_message = ("error running the validation groups %s " % groups)
timeout_at = time.time() + timeout
_wait_for_mistral_execution(timeout_at, mistral, execution, fail_message,
fail_on_error)
def _create_default_plan(mistral, plans, timeout=360):
plan_name = 'overcloud'
queue_name = str(uuid.uuid4())
@ -1567,13 +1501,29 @@ def _create_default_plan(mistral, plans, timeout=360):
'tripleo.plan_management.v1.create_default_deployment_plan',
workflow_input={'container': plan_name, 'queue_name': queue_name}
)
timeout_at = time.time() + timeout
fail_message = ("error creating the default Deployment Plan %s "
"Check the create_default_deployment_plan execution "
"in Mistral with openstack workflow execution list " %
plan_name)
_wait_for_mistral_execution(timeout_at, mistral, execution, fail_message,
fail_on_error=True)
while time.time() < timeout_at:
exe = mistral.executions.get(execution.id)
if exe.state == "RUNNING":
time.sleep(5)
continue
if exe.state == "SUCCESS":
return
else:
raise RuntimeError(
"Failed to create the default Deployment Plan. Please check "
"the create_default_deployment_plan execution in Mistral with "
"`openstack workflow execution list`.")
else:
exe = mistral.executions.get(execution.id)
LOG.error("Timed out waiting for execution %s to finish. State: %s",
exe.id, exe.state)
raise RuntimeError(
"Timed out creating the default Deployment Plan. Please check "
"the create_default_deployment_plan execution in Mistral with "
"`openstack workflow execution list`.")
def _prepare_ssh_environment(mistral):
@ -1606,7 +1556,16 @@ def _post_config_mistral(instack_env, mistral, swift):
def _post_config(instack_env):
_copy_stackrc()
user, password, project, auth_url = _get_auth_values()
sess = _get_session()
auth_kwargs = {
'auth_url': auth_url,
'username': user,
'password': password,
'project_name': project,
'project_domain_name': 'Default',
'user_domain_name': 'Default',
}
auth_plugin = ks_auth.Password(**auth_kwargs)
sess = session.Session(auth=auth_plugin)
# TODO(andreykurilin): remove this check with support of novaclient 6.0.0
if nc.__version__[0] == "6":
nova = novaclient.Client(2, user, password, project, auth_url=auth_url)
@ -1724,9 +1683,6 @@ def install(instack_root, upgrade=False):
_run_orc(instack_env)
_post_config(instack_env)
_run_command(['sudo', 'rm', '-f', '/tmp/svc-map-services'], None, 'rm')
if upgrade and CONF.enable_validations: # Run post-upgrade validations
mistral_url = instack_env['UNDERCLOUD_ENDPOINT_MISTRAL_PUBLIC']
_run_validation_groups(["post-upgrade"], mistral_url)
except Exception as e:
LOG.debug("An exception occurred", exc_info=True)
LOG.error(FAILURE_MESSAGE,

View File

@ -1,9 +0,0 @@
---
upgrade:
- Wires up execution of the "post-upgrade" group of tripleo-validations to
sanity check the undercloud. The validations are executed at the
very end of the process, after the undercloud has been fully upgraded
and all services started in the upgraded versions. If there is an error it
is logged but not raised so these validations will not fail the upgrade.
The operator can set the existing 'enable_validations' to false to skip
these validations.