Increase the timeout for the stop_stack message

Previously, the stop_stack message accidentally used the
engine_life_check_timeout (by default, 2s). But unlike other messages sent
using that timeout, stop_stack needs to synchronously kill all running
threads operating on the stack. For a very large stack, this can easily
take much longer than a couple of seconds. This patch increases the timeout
to give a better chance of being able to start the delete.

Change-Id: I4b36ed7f1025b6439aeab63d71041bb2000363a0
Closes-Bug: #1499669
(cherry picked from commit e56fc689e1)
This commit is contained in:
Zane Bitter 2016-09-22 09:44:56 -04:00
parent e4c4c56464
commit ee86435e44
3 changed files with 57 additions and 8 deletions

View File

@ -79,6 +79,10 @@ cfg.CONF.import_opt('enable_stack_abandon', 'heat.common.config')
cfg.CONF.import_opt('enable_stack_adopt', 'heat.common.config')
cfg.CONF.import_opt('convergence_engine', 'heat.common.config')
# Time to wait for a stack to stop when cancelling running threads, before
# giving up on being able to start a delete.
STOP_STACK_TIMEOUT = 30
LOG = logging.getLogger(__name__)
@ -1147,7 +1151,8 @@ class EngineService(service.Service):
# Another active engine has the lock
elif service_utils.engine_alive(cnxt, engine_id):
cancel_result = self._remote_call(
cnxt, engine_id, self.listener.SEND,
cnxt, engine_id, cfg.CONF.engine_life_check_timeout,
self.listener.SEND,
stack_identity=stack_identity, message=cancel_message)
if cancel_result is None:
LOG.debug("Successfully sent %(msg)s message "
@ -1337,8 +1342,7 @@ class EngineService(service.Service):
return api.format_stack_output(outputs[output_key])
def _remote_call(self, cnxt, lock_engine_id, call, **kwargs):
timeout = cfg.CONF.engine_life_check_timeout
def _remote_call(self, cnxt, lock_engine_id, timeout, call, **kwargs):
self.cctxt = self._client.prepare(
version='1.0',
timeout=timeout,
@ -1396,7 +1400,8 @@ class EngineService(service.Service):
# Another active engine has the lock
elif service_utils.engine_alive(cnxt, acquire_result):
cancel_result = self._remote_call(
cnxt, acquire_result, self.listener.SEND,
cnxt, acquire_result, cfg.CONF.engine_life_check_timeout,
self.listener.SEND,
stack_identity=stack_identity, message=rpc_api.THREAD_CANCEL)
if cancel_result is None:
LOG.debug("Successfully sent %(msg)s message "
@ -1436,7 +1441,8 @@ class EngineService(service.Service):
elif service_utils.engine_alive(cnxt, acquire_result):
# Another active engine has the lock
stop_result = self._remote_call(
cnxt, acquire_result, self.listener.STOP_STACK,
cnxt, acquire_result, STOP_STACK_TIMEOUT,
self.listener.STOP_STACK,
stack_identity=stack_identity)
if stop_result is None:
LOG.debug("Successfully stopped remote task "

View File

@ -165,7 +165,8 @@ class StackDeleteTest(common.HeatTestCase):
mock_load.assert_called_once_with(self.ctx, stack=st)
mock_try.assert_called_once_with()
mock_alive.assert_called_once_with(self.ctx, OTHER_ENGINE)
mock_call.assert_called_once_with(self.ctx, OTHER_ENGINE, "send",
mock_call.assert_called_once_with(self.ctx, OTHER_ENGINE, mock.ANY,
"send",
message='cancel',
stack_identity=mock.ANY)
@ -203,10 +204,10 @@ class StackDeleteTest(common.HeatTestCase):
mock_try.assert_called_with()
mock_alive.assert_called_with(self.ctx, OTHER_ENGINE)
mock_call.assert_has_calls([
mock.call(self.ctx, OTHER_ENGINE, "send",
mock.call(self.ctx, OTHER_ENGINE, mock.ANY, "send",
message='cancel',
stack_identity=mock.ANY),
mock.call(self.ctx, OTHER_ENGINE, "stop_stack",
mock.call(self.ctx, OTHER_ENGINE, mock.ANY, "stop_stack",
stack_identity=mock.ANY)
])
mock_acquire.assert_called_once_with(True)

View File

@ -0,0 +1,42 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import time
from heat_integrationtests.functional import functional_base
class DeleteInProgressTest(functional_base.FunctionalTestsBase):
root_template = '''
heat_template_version: 2013-05-23
resources:
rg:
type: OS::Heat::ResourceGroup
properties:
count: 125
resource_def:
type: empty.yaml
'''
empty_template = '''
heat_template_version: 2013-05-23
resources:
'''
def test_delete_nested_stacks_create_in_progress(self):
files = {'empty.yaml': self.empty_template}
identifier = self.stack_create(template=self.root_template,
files=files,
expected_status='CREATE_IN_PROGRESS')
time.sleep(20)
self._stack_delete(identifier)