Don't try to stop a stack on a dead engine
Check that an engine is still alive before trying to stop the stack during a stack-delete. Change-Id: I3ce87dcefd5c4b5c542c3dc0d288655433bf69c4 Closes-Bug: #1279010
This commit is contained in:
parent
2a2cf5231d
commit
de5feffa6a
|
@ -577,31 +577,33 @@ class EngineService(service.Service):
|
|||
lock = stack_lock.StackLock(cnxt, stack, self.engine_id)
|
||||
acquire_result = lock.try_acquire()
|
||||
|
||||
# Successfully acquired lock
|
||||
if acquire_result is None:
|
||||
self.thread_group_mgr.start_with_acquired_lock(stack, lock,
|
||||
stack.delete)
|
||||
return
|
||||
|
||||
elif acquire_result == self.engine_id: # Current engine has the lock
|
||||
# Current engine has the lock
|
||||
elif acquire_result == self.engine_id:
|
||||
self.thread_group_mgr.stop(stack.id)
|
||||
|
||||
# If the lock isn't released here, then the call to
|
||||
# start_with_lock below will raise an ActionInProgress
|
||||
# exception. Ideally, we wouldn't be calling another
|
||||
# release() here, since it should be called as soon as the
|
||||
# ThreadGroup is stopped. But apparently there's a race
|
||||
# between release() the next call to lock.acquire().
|
||||
db_api.stack_lock_release(stack.id, self.engine_id)
|
||||
|
||||
else: # Another engine has the lock
|
||||
other_engine_id = acquire_result
|
||||
stop_result = remote_stop(other_engine_id)
|
||||
# Another active engine has the lock
|
||||
elif stack_lock.StackLock.engine_alive(cnxt, acquire_result):
|
||||
stop_result = remote_stop(acquire_result)
|
||||
if stop_result is None:
|
||||
logger.debug(_("Successfully stopped remote task on engine %s")
|
||||
% other_engine_id)
|
||||
% acquire_result)
|
||||
else:
|
||||
raise exception.StopActionFailed(stack_name=stack.name,
|
||||
engine_id=other_engine_id)
|
||||
engine_id=acquire_result)
|
||||
|
||||
# If the lock isn't released here, then the call to
|
||||
# start_with_lock below will raise an ActionInProgress
|
||||
# exception. Ideally, we wouldn't be calling another
|
||||
# release() here, since it should be called as soon as the
|
||||
# ThreadGroup is stopped. But apparently there's a race
|
||||
# between release() the next call to lock.acquire().
|
||||
db_api.stack_lock_release(stack.id, acquire_result)
|
||||
|
||||
# There may be additional resources that we don't know about
|
||||
# if an update was in-progress when the stack was stopped, so
|
||||
|
|
|
@ -34,12 +34,13 @@ class StackLock(object):
|
|||
self.engine_id = engine_id
|
||||
self.listener = None
|
||||
|
||||
def _engine_alive(self, engine_id):
|
||||
@staticmethod
|
||||
def engine_alive(context, engine_id):
|
||||
topic = engine_id
|
||||
rpc = proxy.RpcProxy(topic, "1.0")
|
||||
msg = rpc.make_msg("listening")
|
||||
try:
|
||||
return rpc.call(self.context, msg, topic=topic,
|
||||
return rpc.call(context, msg, topic=topic,
|
||||
timeout=cfg.CONF.engine_life_check_timeout)
|
||||
except rpc_common.Timeout:
|
||||
return False
|
||||
|
@ -72,7 +73,7 @@ class StackLock(object):
|
|||
return
|
||||
|
||||
if lock_engine_id == self.engine_id or \
|
||||
self._engine_alive(lock_engine_id):
|
||||
self.engine_alive(self.context, lock_engine_id):
|
||||
logger.debug(_("Lock on stack %(stack)s is owned by engine "
|
||||
"%(engine)s") % {'stack': self.stack.id,
|
||||
'engine': lock_engine_id})
|
||||
|
|
|
@ -728,6 +728,10 @@ class StackServiceCreateUpdateDeleteTest(HeatTestCase):
|
|||
self.m.StubOutWithMock(stack_lock.StackLock, 'try_acquire')
|
||||
stack_lock.StackLock.try_acquire().AndReturn("other-engine-fake-uuid")
|
||||
|
||||
self.m.StubOutWithMock(stack_lock.StackLock, 'engine_alive')
|
||||
stack_lock.StackLock.engine_alive(self.ctx, "other-engine-fake-uuid")\
|
||||
.AndReturn(True)
|
||||
|
||||
rpc = proxy.RpcProxy("other-engine-fake-uuid", "1.0")
|
||||
msg = rpc.make_msg("stop_stack", stack_identity=mox.IgnoreArg())
|
||||
self.m.StubOutWithMock(proxy.RpcProxy, 'call')
|
||||
|
@ -755,6 +759,10 @@ class StackServiceCreateUpdateDeleteTest(HeatTestCase):
|
|||
self.m.StubOutWithMock(stack_lock.StackLock, 'try_acquire')
|
||||
stack_lock.StackLock.try_acquire().AndReturn("other-engine-fake-uuid")
|
||||
|
||||
self.m.StubOutWithMock(stack_lock.StackLock, 'engine_alive')
|
||||
stack_lock.StackLock.engine_alive(self.ctx, "other-engine-fake-uuid")\
|
||||
.AndReturn(True)
|
||||
|
||||
rpc = proxy.RpcProxy("other-engine-fake-uuid", "1.0")
|
||||
msg = rpc.make_msg("stop_stack", stack_identity=mox.IgnoreArg())
|
||||
self.m.StubOutWithMock(proxy.RpcProxy, 'call')
|
||||
|
@ -769,6 +777,32 @@ class StackServiceCreateUpdateDeleteTest(HeatTestCase):
|
|||
self.assertIsNone(self.man.delete_stack(self.ctx, stack.identifier()))
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_stack_delete_other_dead_engine_active_lock(self):
|
||||
stack_name = 'service_delete_test_stack'
|
||||
stack = get_wordpress_stack(stack_name, self.ctx)
|
||||
sid = stack.store()
|
||||
|
||||
# Insert a fake lock into the db
|
||||
db_api.stack_lock_create(stack.id, "other-engine-fake-uuid")
|
||||
|
||||
st = db_api.stack_get(self.ctx, sid)
|
||||
self.m.StubOutWithMock(parser.Stack, 'load')
|
||||
parser.Stack.load(self.ctx, stack=st).MultipleTimes().AndReturn(stack)
|
||||
|
||||
self.m.StubOutWithMock(stack_lock.StackLock, 'try_acquire')
|
||||
stack_lock.StackLock.try_acquire().AndReturn("other-engine-fake-uuid")
|
||||
|
||||
self.m.StubOutWithMock(stack_lock.StackLock, 'engine_alive')
|
||||
stack_lock.StackLock.engine_alive(self.ctx, "other-engine-fake-uuid")\
|
||||
.AndReturn(False)
|
||||
|
||||
self.m.StubOutWithMock(stack_lock.StackLock, 'acquire')
|
||||
stack_lock.StackLock.acquire().AndReturn(None)
|
||||
self.m.ReplayAll()
|
||||
|
||||
self.assertIsNone(self.man.delete_stack(self.ctx, stack.identifier()))
|
||||
self.m.VerifyAll()
|
||||
|
||||
def test_stack_update(self):
|
||||
stack_name = 'service_update_test_stack'
|
||||
params = {'foo': 'bar'}
|
||||
|
|
Loading…
Reference in New Issue