Fix a concurrency issue when locking reprocessing tasks

It was discovered that in some situations the same reprocessing task
might be processed simultaneously by different workers, which can
lead to unnecessary processing. This was happening due to the use
of "current_reprocess_time" in the lock name, which would lead to
different locking name for some situations; for instance, when worker
start processing a brand new reprocessing task, and after reprocessing
a few time frames, the "current_reprocess_time" is updated, then when
other workers achieve the same locking moment, they would have a
different lock name for the same scope ID, and reprocess a scope
that is currently in reprocessing.

Change-Id: I487d0eeb1cedc162d44f8c879a27f924b5c76206
This commit is contained in:
Rafael Weingärtner 2023-07-12 07:48:47 -03:00
parent 60077a3cc4
commit 0451df0d1b
3 changed files with 18 additions and 10 deletions

View File

@ -614,13 +614,17 @@ class CloudKittyProcessor(cotyledon.Service):
lock_name, lock = get_lock(
self.coord, self.generate_lock_base_name(tenant_id))
LOG.debug('[Worker: {w}] Trying to acquire lock "{lock_name}".'
.format(w=self._worker_id, lock_name=lock_name))
LOG.debug('[Worker: {w}] Trying to acquire lock "{lock_name}" for '
'scope ID {scope_id}.'.format(w=self._worker_id,
lock_name=lock_name,
scope_id=tenant_id))
lock_acquired = lock.acquire(blocking=False)
if lock_acquired:
LOG.debug('[Worker: {w}] Acquired lock "{lock_name}".'.format(
w=self._worker_id, lock_name=lock_name))
LOG.debug('[Worker: {w}] Acquired lock "{lock_name}" for '
'scope ID {scope_id}.'.format(w=self._worker_id,
lock_name=lock_name,
scope_id=tenant_id))
try:
self.process_scope(tenant_id)
@ -702,9 +706,10 @@ class CloudKittyReprocessor(CloudKittyProcessor):
self._worker_id, len(self.tenants))
def generate_lock_base_name(self, scope):
return "%s-id=%s-start=%s-end=%s-current=%s" % (
self.worker_class, scope.identifier, scope.start_reprocess_time,
scope.end_reprocess_time, scope.current_reprocess_time)
return "%s-id=%s-start=%s-end=%s" % (self.worker_class,
scope.identifier,
scope.start_reprocess_time,
scope.end_reprocess_time)
class CloudKittyServiceManager(cotyledon.ServiceManager):

View File

@ -623,10 +623,9 @@ class CloudKittyReprocessorTest(tests.TestCase):
expected_lock_name = "<class 'cloudkitty.orchestrator." \
"ReprocessingWorker'>-id=scope_identifier-" \
"start=%s-end=%s-current=%s" % (
"start=%s-end=%s" % (
scope_mock.start_reprocess_time,
scope_mock.end_reprocess_time,
scope_mock.current_reprocess_time)
scope_mock.end_reprocess_time)
self.assertEqual(expected_lock_name, return_generate_lock_name)

View File

@ -0,0 +1,4 @@
---
fixes:
- |
Fixed concurrency issues during reprocessing tasks.