Merge "Invalidate az cache on bad request"

This commit is contained in:
Zuul 2018-07-23 13:25:35 +00:00 committed by Gerrit Code Review
commit b3e1890e2a
4 changed files with 104 additions and 4 deletions

View File

@ -19,7 +19,7 @@ import threading
import time
import uuid
import openstack
import openstack.exceptions
from nodepool import exceptions
from nodepool.driver.openstack.provider import OpenStackProvider
@ -100,6 +100,7 @@ class FakeOpenStackCloud(object):
Dummy(Dummy.FLAVOR, id='f2', ram=8192, name='Unreal Flavor',
vcpus=4),
]
self._azs = ['az1', 'az2']
self._server_list = []
self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\
_get_quota()
@ -156,6 +157,12 @@ class FakeOpenStackCloud(object):
len(instance_list) >= self.max_instances):
over_quota = True
az = kw.get('availability_zone')
if az and az not in self._azs:
raise openstack.exceptions.BadRequestException(
message='The requested availability zone is not available',
http_status=400)
s = Dummy(instance_type,
id=uuid.uuid4().hex,
name=kw['name'],
@ -261,7 +268,7 @@ class FakeOpenStackCloud(object):
self._delete(name_or_id, self._server_list)
def list_availability_zone_names(self):
return ['fake-az1', 'fake-az2']
return self._azs.copy()
def get_compute_limits(self):
return Dummy(

View File

@ -324,13 +324,15 @@ class OpenStackProvider(Provider):
except openstack.exceptions.BadRequestException:
# We've gotten a 400 error from nova - which means the request
# was malformed. The most likely cause of that, unless something
# became functionally and systemically broken, is stale image
# became functionally and systemically broken, is stale az, image
# or flavor cache. Log a message, invalidate the caches so that
# next time we get new caches.
self._images = {}
self.__azs = None
self.__flavors = {} # TODO(gtema): caching
self.log.info(
"Clearing flavor and image caches due to 400 error from nova")
"Clearing az, flavor and image caches due to 400 error "
"from nova")
raise
def getServer(self, server_id):

View File

@ -0,0 +1,44 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 0
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
meta:
key: value
key2: value
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -350,6 +350,53 @@ class TestLauncher(tests.DBTestCase):
self.assertEqual(req.state, zk.FAILED)
self.assertNotEqual(req.declined_by, [])
def test_az_change_recover(self):
'''
Test that nodepool recovers from az change in the cloud.
'''
configfile = self.setup_config('node_az_change.yaml')
self.useBuilder(configfile)
self.waitForImage('fake-provider', 'fake-image')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.wait_for_config(pool)
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
req = self.waitForNodeRequest(req)
self.assertEqual(req.state, zk.FULFILLED)
# now change the azs in the cloud
cloud = pool.getProviderManager('fake-provider')._getClient()
cloud._azs = ['new-az1', 'new-az2']
# Do a second request. This will fail because the cached azs are not
# available anymore.
# TODO(tobiash): Ideally we should already be able to already recover
# this request.
req2 = zk.NodeRequest()
req2.state = zk.REQUESTED
req2.node_types.append('fake-label')
self.zk.storeNodeRequest(req2)
req2 = self.waitForNodeRequest(req2)
self.assertEqual(req2.state, zk.FAILED)
# Create a third request to test that nodepool successfully recovers
# from a stale az cache.
req3 = zk.NodeRequest()
req3.state = zk.REQUESTED
req3.node_types.append('fake-label')
self.zk.storeNodeRequest(req3)
req3 = self.waitForNodeRequest(req3)
self.assertEqual(req3.state, zk.FULFILLED)
node = self.zk.getNode(req3.nodes[0])
self.assertIn(node.az, ['new-az1', 'new-az2'])
def test_fail_minready_request_at_capacity(self):
'''
A min-ready request to a provider that is already at capacity should