Add nodepool-id to provider section

Currently, while testing zuulv3, we are wanting to share the
infracloud-chocolate provider between 2 nodepool servers.  The current
issue is, if we launch nodes from zuulv3-dev.o.o, nodepool.o.o will
detect the nodes as leaked and delete them.

A way to solve this, is to create a per provider 'nodepool-id' where
an admin can configure 2 separate nodepool servers to share the same
tenant.  The big reason for doing this, is so we don't have to stand
up a duplicate nodepool-builder and upload duplicate images.

Change-Id: I03a95ce7b8bf06199de7f46fd3d0f82407bec8f5
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
Paul Belanger 2017-02-27 14:16:12 -05:00
parent cb1f52634a
commit a6f4f6be9b
7 changed files with 152 additions and 1 deletions

View File

@ -446,6 +446,14 @@ provider, the Nodepool image types are also defined (see
In seconds. Default 3600.
``nodepool-id`` (deprecated)
A unique string to identify which nodepool instances is using a provider.
This is useful if you want to configure production and development instances
of nodepool but share the same provider.
Default None
``keypair``
Default None

View File

@ -73,6 +73,7 @@ class ConfigValidator:
'boot-timeout': int,
'api-timeout': int,
'launch-timeout': int,
'nodepool-id': str,
'rate': float,
'images': [images],
'template-hostname': str,

View File

@ -40,6 +40,7 @@ class Config(ConfigValue):
class Provider(ConfigValue):
def __eq__(self, other):
if (other.cloud_config != self.cloud_config or
other.nodepool_id != self.nodepool_id or
other.max_servers != self.max_servers or
other.pool != self.pool or
other.image_type != self.image_type or
@ -197,6 +198,7 @@ def loadConfig(config_path):
cloud_kwargs = _cloudKwargsFromProvider(provider)
p.cloud_config = _get_one_cloud(cloud_config, cloud_kwargs)
p.nodepool_id = provider.get('nodepool-id', None)
p.region_name = provider.get('region-name')
p.max_servers = provider['max-servers']
p.keypair = provider.get('keypair', None)

View File

@ -1542,6 +1542,13 @@ class NodePool(threading.Thread):
provider.name,
meta['provider_name']))
continue
nodepool_id = meta.get('nodepool_id', None)
if nodepool_id and nodepool_id != provider.nodepool_id:
self.log.debug("Instance %s (%s) in %s "
"was not launched by us" % (
server['name'], server['id'],
provider.name))
continue
node_id = meta.get('node_id')
if node_id:
if session.getNode(node_id):

View File

@ -220,6 +220,8 @@ class ProviderManager(object):
# groups[0] is the image name or anything silly like that.
nodepool_meta = dict(provider_name=self.provider.name)
groups_meta = [self.provider.name]
if self.provider.nodepool_id:
nodepool_meta['nodepool_id'] = self.provider.nodepool_id
if nodepool_node_id:
nodepool_meta['node_id'] = nodepool_node_id
if nodepool_snapshot_image_id:

View File

@ -0,0 +1,62 @@
elements-dir: .
images-dir: '{images_dir}'
cron:
check: '*/15 * * * *'
cleanup: '* * * * * *'
zmq-publishers:
- tcp://localhost:8881
gearman-servers:
- host: localhost
port: {gearman_port}
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
image: fake-image
min-ready: 1
providers:
- name: fake-provider
providers:
- name: fake-provider
region-name: fake-region
keypair: 'if-present-use-this-keypair'
username: 'fake'
password: 'fake'
auth-url: 'fake'
project-id: 'fake'
max-servers: 96
pool: 'fake'
networks:
- net-id: 'some-uuid'
rate: 0.0001
nodepool-id: foo
images:
- name: fake-image
min-ram: 8192
name-filter: 'Fake'
meta:
key: value
key2: value
targets:
- name: fake-target
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -334,9 +334,15 @@ class TestNodepool(tests.DBTestCase):
self.assertEqual(len(deleted_nodes), 1)
self.assertEqual(node_id, deleted_nodes[0].id)
def test_leaked_node_with_nodepool_id(self):
self._test_leaked_node('leaked_node_nodepool_id.yaml')
def test_leaked_node(self):
self._test_leaked_node('leaked_node.yaml')
def _test_leaked_node(self, cfgfile):
"""Test that a leaked node is deleted"""
configfile = self.setup_config('leaked_node.yaml')
configfile = self.setup_config(cfgfile)
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
pool.start()
@ -385,6 +391,69 @@ class TestNodepool(tests.DBTestCase):
state=nodedb.READY)
self.assertEqual(len(nodes), 1)
def test_leaked_node_not_deleted(self):
"""Test that a leaked node is not deleted"""
configfile = self.setup_config('leaked_node_nodepool_id.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self._useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
self.log.debug("Waiting for initial pool...")
self.waitForNodes(pool)
self.log.debug("...done waiting for initial pool.")
pool.stop()
# Make sure we have a node built and ready
provider = pool.config.providers['fake-provider']
manager = pool.getProviderManager(provider)
servers = manager.listServers()
self.assertEqual(len(servers), 1)
with pool.getDB().getSession() as session:
nodes = session.getNodes(provider_name='fake-provider',
label_name='fake-label',
target_name='fake-target',
state=nodedb.READY)
self.assertEqual(len(nodes), 1)
# Delete the node from the db, but leave the instance
# so it is leaked.
self.log.debug("Delete node db record so instance is leaked...")
for node in nodes:
node.delete()
self.log.debug("...deleted node db so instance is leaked.")
nodes = session.getNodes(provider_name='fake-provider',
label_name='fake-label',
target_name='fake-target',
state=nodedb.READY)
self.assertEqual(len(nodes), 0)
# Wait for nodepool to replace it, which should be enough
# time for it to also delete the leaked node
configfile = self.setup_config('leaked_node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
self.log.debug("Waiting for replacement pool...")
self.waitForNodes(pool)
self.log.debug("...done waiting for replacement pool.")
# Make sure we end up with only one server (the replacement)
provider = pool.config.providers['fake-provider']
manager = pool.getProviderManager(provider)
foobar_servers = manager.listServers()
self.assertEqual(len(servers), 1)
self.assertEqual(len(foobar_servers), 1)
with pool.getDB().getSession() as session:
nodes = session.getNodes(provider_name='fake-provider',
label_name='fake-label',
target_name='fake-target',
state=nodedb.READY)
self.assertEqual(len(nodes), 1)
# Just to be safe, ensure we have 2 nodes again.
self.assertEqual(len(servers), 1)
self.assertEqual(len(foobar_servers), 1)
@skip("Disabled for early v3 development")
def test_building_image_cleanup_on_start(self):
"""Test that a building image is deleted on start"""