Merge "Invalidate az cache on bad request"
This commit is contained in:
commit
b3e1890e2a
|
@ -19,7 +19,7 @@ import threading
|
|||
import time
|
||||
import uuid
|
||||
|
||||
import openstack
|
||||
import openstack.exceptions
|
||||
|
||||
from nodepool import exceptions
|
||||
from nodepool.driver.openstack.provider import OpenStackProvider
|
||||
|
@ -100,6 +100,7 @@ class FakeOpenStackCloud(object):
|
|||
Dummy(Dummy.FLAVOR, id='f2', ram=8192, name='Unreal Flavor',
|
||||
vcpus=4),
|
||||
]
|
||||
self._azs = ['az1', 'az2']
|
||||
self._server_list = []
|
||||
self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\
|
||||
_get_quota()
|
||||
|
@ -156,6 +157,12 @@ class FakeOpenStackCloud(object):
|
|||
len(instance_list) >= self.max_instances):
|
||||
over_quota = True
|
||||
|
||||
az = kw.get('availability_zone')
|
||||
if az and az not in self._azs:
|
||||
raise openstack.exceptions.BadRequestException(
|
||||
message='The requested availability zone is not available',
|
||||
http_status=400)
|
||||
|
||||
s = Dummy(instance_type,
|
||||
id=uuid.uuid4().hex,
|
||||
name=kw['name'],
|
||||
|
@ -261,7 +268,7 @@ class FakeOpenStackCloud(object):
|
|||
self._delete(name_or_id, self._server_list)
|
||||
|
||||
def list_availability_zone_names(self):
|
||||
return ['fake-az1', 'fake-az2']
|
||||
return self._azs.copy()
|
||||
|
||||
def get_compute_limits(self):
|
||||
return Dummy(
|
||||
|
|
|
@ -324,13 +324,15 @@ class OpenStackProvider(Provider):
|
|||
except openstack.exceptions.BadRequestException:
|
||||
# We've gotten a 400 error from nova - which means the request
|
||||
# was malformed. The most likely cause of that, unless something
|
||||
# became functionally and systemically broken, is stale image
|
||||
# became functionally and systemically broken, is stale az, image
|
||||
# or flavor cache. Log a message, invalidate the caches so that
|
||||
# next time we get new caches.
|
||||
self._images = {}
|
||||
self.__azs = None
|
||||
self.__flavors = {} # TODO(gtema): caching
|
||||
self.log.info(
|
||||
"Clearing flavor and image caches due to 400 error from nova")
|
||||
"Clearing az, flavor and image caches due to 400 error "
|
||||
"from nova")
|
||||
raise
|
||||
|
||||
def getServer(self, server_id):
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
elements-dir: .
|
||||
images-dir: '{images_dir}'
|
||||
build-log-dir: '{build_log_dir}'
|
||||
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
labels:
|
||||
- name: fake-label
|
||||
min-ready: 0
|
||||
|
||||
providers:
|
||||
- name: fake-provider
|
||||
cloud: fake
|
||||
driver: fake
|
||||
region-name: fake-region
|
||||
rate: 0.0001
|
||||
diskimages:
|
||||
- name: fake-image
|
||||
meta:
|
||||
key: value
|
||||
key2: value
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 96
|
||||
labels:
|
||||
- name: fake-label
|
||||
diskimage: fake-image
|
||||
min-ram: 8192
|
||||
flavor-name: 'Fake'
|
||||
|
||||
diskimages:
|
||||
- name: fake-image
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
|
@ -350,6 +350,53 @@ class TestLauncher(tests.DBTestCase):
|
|||
self.assertEqual(req.state, zk.FAILED)
|
||||
self.assertNotEqual(req.declined_by, [])
|
||||
|
||||
def test_az_change_recover(self):
|
||||
'''
|
||||
Test that nodepool recovers from az change in the cloud.
|
||||
'''
|
||||
configfile = self.setup_config('node_az_change.yaml')
|
||||
self.useBuilder(configfile)
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
self.wait_for_config(pool)
|
||||
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
req = self.waitForNodeRequest(req)
|
||||
self.assertEqual(req.state, zk.FULFILLED)
|
||||
|
||||
# now change the azs in the cloud
|
||||
cloud = pool.getProviderManager('fake-provider')._getClient()
|
||||
cloud._azs = ['new-az1', 'new-az2']
|
||||
|
||||
# Do a second request. This will fail because the cached azs are not
|
||||
# available anymore.
|
||||
# TODO(tobiash): Ideally we should already be able to already recover
|
||||
# this request.
|
||||
req2 = zk.NodeRequest()
|
||||
req2.state = zk.REQUESTED
|
||||
req2.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req2)
|
||||
req2 = self.waitForNodeRequest(req2)
|
||||
self.assertEqual(req2.state, zk.FAILED)
|
||||
|
||||
# Create a third request to test that nodepool successfully recovers
|
||||
# from a stale az cache.
|
||||
req3 = zk.NodeRequest()
|
||||
req3.state = zk.REQUESTED
|
||||
req3.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req3)
|
||||
req3 = self.waitForNodeRequest(req3)
|
||||
self.assertEqual(req3.state, zk.FULFILLED)
|
||||
|
||||
node = self.zk.getNode(req3.nodes[0])
|
||||
self.assertIn(node.az, ['new-az1', 'new-az2'])
|
||||
|
||||
def test_fail_minready_request_at_capacity(self):
|
||||
'''
|
||||
A min-ready request to a provider that is already at capacity should
|
||||
|
|
Loading…
Reference in New Issue