Fix leaked upload cleanup
The cleanup routine for leaked image uploads based its detection on upload ids, but they are not unique except in the context of a provider and build. This meant that, for example, as long as there was an upload with id 0000000001 for any image build for the provider (very likely!) we would skip cleaning up any leaked uploads with id 0000000001. Correct this by using a key generated on build+upload (provider is implied because we only consider uploads for our current provider). Update the tests relevant to this code to exercise this condition. Change-Id: Ic68932b735d7439ca39e2fbfbe1f73c7942152d6
This commit is contained in:
parent
1ac3f8bda4
commit
fc95724601
|
@ -871,9 +871,9 @@ class StateMachineProvider(Provider, QuotaSupport):
|
|||
known_uploads = set()
|
||||
uploads = self._zk.getProviderUploads(self.provider.name)
|
||||
for image in uploads.values():
|
||||
for build in image.values():
|
||||
for build_id, build in image.items():
|
||||
for upload in build:
|
||||
known_uploads.add(upload.id)
|
||||
known_uploads.add(f'{build_id}-{upload.id}')
|
||||
|
||||
newly_leaked_nodes = {}
|
||||
newly_leaked_uploads = {}
|
||||
|
@ -882,7 +882,13 @@ class StateMachineProvider(Provider, QuotaSupport):
|
|||
if pn != self.provider.name:
|
||||
continue
|
||||
node_id = resource.metadata.get('nodepool_node_id')
|
||||
build_id = resource.metadata.get('nodepool_build_id')
|
||||
upload_id = resource.metadata.get('nodepool_upload_id')
|
||||
# Make a truly unique upload id
|
||||
if upload_id:
|
||||
build_upload_id = f'{build_id}-{upload_id}'
|
||||
else:
|
||||
build_upload_id = None
|
||||
if node_id and node_id not in known_nodes:
|
||||
newly_leaked_nodes[node_id] = resource
|
||||
if node_id in self.possibly_leaked_nodes:
|
||||
|
@ -898,9 +904,9 @@ class StateMachineProvider(Provider, QuotaSupport):
|
|||
except Exception:
|
||||
self.log.exception("Unable to delete leaked "
|
||||
f"resource for node {node_id}")
|
||||
if upload_id and upload_id not in known_uploads:
|
||||
newly_leaked_uploads[upload_id] = resource
|
||||
if upload_id in self.possibly_leaked_uploads:
|
||||
if build_upload_id and build_upload_id not in known_uploads:
|
||||
newly_leaked_uploads[build_upload_id] = resource
|
||||
if build_upload_id in self.possibly_leaked_uploads:
|
||||
# We've seen this twice now, so it's not a race
|
||||
# condition.
|
||||
try:
|
||||
|
@ -911,8 +917,9 @@ class StateMachineProvider(Provider, QuotaSupport):
|
|||
resource.plural_metric_name))
|
||||
self._statsd.incr(key, 1)
|
||||
except Exception:
|
||||
self.log.exception("Unable to delete leaked "
|
||||
f"resource for upload {upload_id}")
|
||||
self.log.exception(
|
||||
"Unable to delete leaked "
|
||||
f"resource for upload {build_upload_id}")
|
||||
self.possibly_leaked_nodes = newly_leaked_nodes
|
||||
self.possibly_leaked_uploads = newly_leaked_uploads
|
||||
|
||||
|
|
|
@ -1031,10 +1031,22 @@ class TestDriverAws(tests.DBTestCase):
|
|||
|
||||
def test_aws_resource_cleanup_import_snapshot(self):
|
||||
# This tests the import_snapshot path
|
||||
|
||||
# Create a valid, non-leaked image to test id collisions, and
|
||||
# that it is not deleted.
|
||||
configfile = self.setup_config('aws/diskimage.yaml')
|
||||
self.useBuilder(configfile)
|
||||
current_image = self.waitForImage('ec2-us-west-2', 'fake-image')
|
||||
|
||||
# Assert the image exists
|
||||
self.ec2.Image(current_image.external_id).state
|
||||
|
||||
# Start by setting up leaked resources
|
||||
# Use low numbers to intentionally collide with the current
|
||||
# image to ensure we test the non-uniqueness of upload ids.
|
||||
image_tags = [
|
||||
{'Key': 'nodepool_build_id', 'Value': '0000000042'},
|
||||
{'Key': 'nodepool_upload_id', 'Value': '0000000042'},
|
||||
{'Key': 'nodepool_build_id', 'Value': '0000000001'},
|
||||
{'Key': 'nodepool_upload_id', 'Value': '0000000001'},
|
||||
{'Key': 'nodepool_provider_name', 'Value': 'ec2-us-west-2'}
|
||||
]
|
||||
|
||||
|
@ -1115,12 +1127,25 @@ class TestDriverAws(tests.DBTestCase):
|
|||
# Probably not found
|
||||
break
|
||||
|
||||
# Assert the non-leaked image still exists
|
||||
self.ec2.Image(current_image.external_id).state
|
||||
|
||||
def test_aws_resource_cleanup_import_image(self):
|
||||
# This tests the import_image path
|
||||
|
||||
# Create a valid, non-leaked image to test id collisions, and
|
||||
# that it is not deleted.
|
||||
configfile = self.setup_config('aws/diskimage.yaml')
|
||||
self.useBuilder(configfile)
|
||||
current_image = self.waitForImage('ec2-us-west-2', 'fake-image')
|
||||
|
||||
# Assert the image exists
|
||||
self.ec2.Image(current_image.external_id).state
|
||||
|
||||
# Start by setting up leaked resources
|
||||
image_tags = [
|
||||
{'Key': 'nodepool_build_id', 'Value': '0000000042'},
|
||||
{'Key': 'nodepool_upload_id', 'Value': '0000000042'},
|
||||
{'Key': 'nodepool_build_id', 'Value': '0000000001'},
|
||||
{'Key': 'nodepool_upload_id', 'Value': '0000000001'},
|
||||
{'Key': 'nodepool_provider_name', 'Value': 'ec2-us-west-2'}
|
||||
]
|
||||
|
||||
|
@ -1179,6 +1204,9 @@ class TestDriverAws(tests.DBTestCase):
|
|||
# Probably not found
|
||||
break
|
||||
|
||||
# Assert the non-leaked image still exists
|
||||
self.ec2.Image(current_image.external_id).state
|
||||
|
||||
def test_aws_get_import_image_task(self):
|
||||
# A unit test of the unusual error handling for missing tasks
|
||||
configfile = self.setup_config('aws/diskimage.yaml')
|
||||
|
|
Loading…
Reference in New Issue