diff --git a/nodepool/driver/statemachine.py b/nodepool/driver/statemachine.py index a3b61d04b..6b53e5aeb 100644 --- a/nodepool/driver/statemachine.py +++ b/nodepool/driver/statemachine.py @@ -871,9 +871,9 @@ class StateMachineProvider(Provider, QuotaSupport): known_uploads = set() uploads = self._zk.getProviderUploads(self.provider.name) for image in uploads.values(): - for build in image.values(): + for build_id, build in image.items(): for upload in build: - known_uploads.add(upload.id) + known_uploads.add(f'{build_id}-{upload.id}') newly_leaked_nodes = {} newly_leaked_uploads = {} @@ -882,7 +882,13 @@ class StateMachineProvider(Provider, QuotaSupport): if pn != self.provider.name: continue node_id = resource.metadata.get('nodepool_node_id') + build_id = resource.metadata.get('nodepool_build_id') upload_id = resource.metadata.get('nodepool_upload_id') + # Make a truly unique upload id + if upload_id: + build_upload_id = f'{build_id}-{upload_id}' + else: + build_upload_id = None if node_id and node_id not in known_nodes: newly_leaked_nodes[node_id] = resource if node_id in self.possibly_leaked_nodes: @@ -898,9 +904,9 @@ class StateMachineProvider(Provider, QuotaSupport): except Exception: self.log.exception("Unable to delete leaked " f"resource for node {node_id}") - if upload_id and upload_id not in known_uploads: - newly_leaked_uploads[upload_id] = resource - if upload_id in self.possibly_leaked_uploads: + if build_upload_id and build_upload_id not in known_uploads: + newly_leaked_uploads[build_upload_id] = resource + if build_upload_id in self.possibly_leaked_uploads: # We've seen this twice now, so it's not a race # condition. try: @@ -911,8 +917,9 @@ class StateMachineProvider(Provider, QuotaSupport): resource.plural_metric_name)) self._statsd.incr(key, 1) except Exception: - self.log.exception("Unable to delete leaked " - f"resource for upload {upload_id}") + self.log.exception( + "Unable to delete leaked " + f"resource for upload {build_upload_id}") self.possibly_leaked_nodes = newly_leaked_nodes self.possibly_leaked_uploads = newly_leaked_uploads diff --git a/nodepool/tests/unit/test_driver_aws.py b/nodepool/tests/unit/test_driver_aws.py index e7b0c852c..41c4d5878 100644 --- a/nodepool/tests/unit/test_driver_aws.py +++ b/nodepool/tests/unit/test_driver_aws.py @@ -1031,10 +1031,22 @@ class TestDriverAws(tests.DBTestCase): def test_aws_resource_cleanup_import_snapshot(self): # This tests the import_snapshot path + + # Create a valid, non-leaked image to test id collisions, and + # that it is not deleted. + configfile = self.setup_config('aws/diskimage.yaml') + self.useBuilder(configfile) + current_image = self.waitForImage('ec2-us-west-2', 'fake-image') + + # Assert the image exists + self.ec2.Image(current_image.external_id).state + # Start by setting up leaked resources + # Use low numbers to intentionally collide with the current + # image to ensure we test the non-uniqueness of upload ids. image_tags = [ - {'Key': 'nodepool_build_id', 'Value': '0000000042'}, - {'Key': 'nodepool_upload_id', 'Value': '0000000042'}, + {'Key': 'nodepool_build_id', 'Value': '0000000001'}, + {'Key': 'nodepool_upload_id', 'Value': '0000000001'}, {'Key': 'nodepool_provider_name', 'Value': 'ec2-us-west-2'} ] @@ -1115,12 +1127,25 @@ class TestDriverAws(tests.DBTestCase): # Probably not found break + # Assert the non-leaked image still exists + self.ec2.Image(current_image.external_id).state + def test_aws_resource_cleanup_import_image(self): # This tests the import_image path + + # Create a valid, non-leaked image to test id collisions, and + # that it is not deleted. + configfile = self.setup_config('aws/diskimage.yaml') + self.useBuilder(configfile) + current_image = self.waitForImage('ec2-us-west-2', 'fake-image') + + # Assert the image exists + self.ec2.Image(current_image.external_id).state + # Start by setting up leaked resources image_tags = [ - {'Key': 'nodepool_build_id', 'Value': '0000000042'}, - {'Key': 'nodepool_upload_id', 'Value': '0000000042'}, + {'Key': 'nodepool_build_id', 'Value': '0000000001'}, + {'Key': 'nodepool_upload_id', 'Value': '0000000001'}, {'Key': 'nodepool_provider_name', 'Value': 'ec2-us-west-2'} ] @@ -1179,6 +1204,9 @@ class TestDriverAws(tests.DBTestCase): # Probably not found break + # Assert the non-leaked image still exists + self.ec2.Image(current_image.external_id).state + def test_aws_get_import_image_task(self): # A unit test of the unusual error handling for missing tasks configfile = self.setup_config('aws/diskimage.yaml')