Handle add_host and delete_host cases more robustly

Presently novajoin has no way of differentiating between hosts and
hostnames. As a result, it is possible for a host to be inadvertantly
deleted in certain conditions.

This fix aims to resolve this and other join/delete edge cases  by
passing the instance-id (server uuid) from nova along in the
description field that is passed to IdM. We can use this
description and id to ensure we delete only the hosts we meant to.

Overview of changes:
- Persist nova instance-id in IdM's Description field
- Update join logic to handle hosts with old Description field
- Update join logic to cause nova deploy failure when attempting to
  add a host with a hostname that is already enrolled
- Add new DuplicateInstanceError exception type
- Add new DeleteInstanceIdMismatch exception type
- Add inline comments documenting code flow
- IPAClient add_host doc strings for clarity

Change-Id: I676bac162a6ec35366c506bdb660cf3913131afd
This commit is contained in:
Harry Rybacki 2019-08-20 11:44:05 -04:00
parent 6be279e35b
commit 4f1353bb1f
5 changed files with 146 additions and 24 deletions

View File

@ -157,3 +157,18 @@ class NotificationVersionMismatch(JoinException):
class IPAConnectionError(JoinException):
message = "Unable to connect to IPA after %(tries)s tries"
class DuplicateInstanceError(JoinException):
message = ("%(hostname)s already exists in IPA. It was associated with "
"nova instance %(stored_instance_id)s. The new host_add "
"originates from nova instance %(instance_id)s. Please "
"review and manually remove host entries before attempting "
"to redeploy.")
class DeleteInstanceIdMismatch(JoinException):
message = ("%(hostname)s not removed from IPA. The requested "
"instance id: %(requested_instance)s -- does not match "
"the instance id stored in IPA for that host: "
"%(stored_instance)s.")

View File

@ -13,6 +13,7 @@
# under the License.
import cachetools
import json
import os
import time
import uuid
@ -264,15 +265,24 @@ class IPAClient(IPANovaJoinBase):
service_cache = cachetools.TTLCache(maxsize=512, ttl=30)
def add_host(self, hostname, ipaotp, metadata=None, image_metadata=None,
message_id=0):
instance_id=None, message_id=0):
"""Add a host to IPA.
If requested in the metadata, add a host to IPA. The assumption
is that hostname is already fully-qualified.
Because this is triggered by a metadata request, which can happen
multiple times, first we try to update the OTP in the host entry
and if that fails due to NotFound the host is added.
Returns:
Unicode Str: When a new host is added successfully, its OTP is returned
This is later used to enroll the host via cloud-init
False: Returned when host with same hostname and nova instance-id is
already enrolled in IPA.
DuplicateInstanceError: This is raised when an attempt is made to add
a hostname that already exists and is enrolled in IPA.
"""
LOG.debug("[%s] Adding %s to IPA.", message_id, hostname)
@ -302,8 +312,11 @@ class IPAClient(IPANovaJoinBase):
osdistro = image_metadata.get('os_distro', '')
osver = image_metadata.get('os_version', '')
# 'description': 'IPA host for %s' % inst.display_description,
novajoin_metadata = {"novajoin_metadata":
{"nova_instance_id": instance_id}}
hostargs = {
'description': u'IPA host for OpenStack',
'description': six.text_type(json.dumps(novajoin_metadata)),
'userpassword': six.text_type(ipaotp),
'force': True # we don't have an ip addr yet so
# use force to add anyway
@ -319,11 +332,11 @@ class IPAClient(IPANovaJoinBase):
modargs = {
'userpassword': six.text_type(ipaotp),
}
try:
self._call_ipa(message_id, 'host_mod', *params, **modargs)
self.host_cache[hostname] = six.text_type(ipaotp)
except errors.NotFound:
# No host with this hostname is presently enrolled in IPA
try:
self._call_ipa(message_id, 'host_add', *params, **hostargs)
self.host_cache[hostname] = six.text_type(ipaotp)
@ -335,13 +348,60 @@ class IPAClient(IPANovaJoinBase):
# and the OTP was set.
self.host_cache[hostname] = six.text_type(ipaotp)
except errors.ValidationError:
# A host with the same hostname is already enrolled in IPA.
# Updating the OTP on an enrolled-host is not allowed
# in IPA and really a no-op.
# We don't know the OTP of the host, so we cannot update the cache.
LOG.info('OTP is unknown for host %s. This is because validation '
'failed during host_mod operation, which means the host '
'with the same name was already enrolled.', hostname)
return False
# Note(hrybacki): Fetch host details from IPA and try to compare
# the requested instance ID with what is stored
# in IPA
target_host = self._call_ipa(message_id, 'host_show', hostname)
try:
novajoin_metadata = json.loads(target_host['result']
['description'][0])
stored_host_instance_id = \
novajoin_metadata['novajoin_metadata']['nova_instance_id']
if stored_host_instance_id != instance_id:
LOG.error('[%s] %s already exists and is enrolled ' +
'with IPA. It was created for nova ' +
'instance %s. Requested host add originates ' +
'from nova instance %s. Please review and ' +
'manually remove host entries in IPA or choose' +
'another hostname before deploying the nova ' +
'instance again.',
message_id, hostname,
stored_host_instance_id, instance_id)
# NOTE(hrybacki): When we know the new host's instance-id
# does not match the enrolled host's, tell
# Nova to put the new compute node in an
# ERROR state
raise exception.DuplicateInstanceError(
hostname=hostname,
stored_instance_id=stored_host_instance_id,
instance_id=instance_id)
except ValueError:
# Note(hrybacki): Older host entries will have a unicode str
# rather than a serialized json returned in
# their Description field
stored_host_instance_id = 'unknown'
LOG.error('[%s] %s already exists and is enrolled with ' +
'IPA. This is an older host and novajoin is ' +
'unable to determine if the new host (%s) is ' +
'the same as the host in IPA. Please review ' +
'and manually remove host entries in IPA before ' +
'deploying the nova instance.',
message_id, hostname, instance_id)
raise exception.DuplicateInstanceError(
hostname=hostname,
stored_instance_id=stored_host_instance_id,
instance_id=instance_id)
else:
# Note(hrybacki): The host is already enrolled, do nothing.
LOG.info('OTP is unknown for host %s. This is because '
'validation failed during host_mod operation, '
'which means the host with the same name was '
'already enrolled.', hostname)
return False
return self.host_cache.get(hostname, False)
@ -397,7 +457,8 @@ class IPAClient(IPANovaJoinBase):
# Ignore DNS deletion errors
pass
def delete_host(self, hostname, metadata=None, message_id=0):
def delete_host(self, hostname, requested_instance_id, metadata=None,
message_id=0):
"""Delete a host from IPA and remove all related DNS entries."""
LOG.debug("[%s] Deleting %s from IPA", message_id, hostname)
@ -408,8 +469,38 @@ class IPAClient(IPANovaJoinBase):
if metadata is None:
metadata = {}
# TODO(rcrit): lookup instance in nova to get metadata to see if
# the host was enrolled. For now assume yes.
target_host_instance_id = None
try:
# Note(hrybacki): Fetch host details from IPA and ensure we do not
# delete a host entry if its instance-id does not
# match the request
target_host = self._call_ipa(message_id, 'host_show', hostname)
novajoin_metadata = json.loads(target_host['result']
['description'][0])
target_host_instance_id = \
novajoin_metadata['novajoin_metadata']['nova_instance_id']
if target_host_instance_id != requested_instance_id:
LOG.info('Leaving %s in place. Deletion request instance-id ' +
'(%s) does not match instance-id mapped to host in ' +
'IPA (%s). This could be the result of an old, ' +
'cached delete request.',
hostname, requested_instance_id,
target_host_instance_id)
raise exception.DeleteInstanceIdMismatch(
hostname=hostname,
requested_instance=requested_instance_id,
stored_instance=target_host_instance_id)
except errors.NotFound:
# NOTE(hrybacki): No host found. Continue and clean up the DNS
# records.
pass
except ValueError:
# NOTE(hrybacki): If we catch this, we are likely dealing with a
# host that was created using an older version of
# novajoin and does not contain the json object
# with our metadata. Continue deletion of host,
# subhosts, and dns records.
pass
params = [hostname]
kw = {
@ -420,8 +511,8 @@ class IPAClient(IPANovaJoinBase):
del self.host_cache[hostname]
self._call_ipa(message_id, 'host_del', *params, **kw)
except (errors.NotFound, errors.ACIError):
# Trying to delete a host that doesn't exist will raise an ACIError
# to hide whether the entry exists or not
# Trying to delete a host that doesn't exist will raise an
# ACIError to hide whether the entry exists or not
pass
(hn, domain) = self.split_hostname(hostname)
@ -431,7 +522,8 @@ class IPAClient(IPANovaJoinBase):
dns_kw = {'del_all': True, }
try:
self._call_ipa(message_id, 'dnsrecord_del', *dns_params, **dns_kw)
self._call_ipa(message_id, 'dnsrecord_del', *dns_params,
**dns_kw)
except (errors.NotFound, errors.ACIError):
# Ignore DNS deletion errors
pass

View File

@ -190,6 +190,7 @@ class JoinController(Controller):
data = {}
ipaotp = uuid.uuid4().hex
instance_id = body.get('instance-id', '')
data['hostname'] = util.get_fqdn(hostname_short, project_name)
_, realm = self.ipaclient.get_host_and_realm()
@ -198,10 +199,15 @@ class JoinController(Controller):
try:
data['ipaotp'] = self.ipaclient.add_host(data['hostname'], ipaotp,
metadata, image_metadata,
message_id)
instance_id, message_id)
if not data['ipaotp']:
# OTP was not added to host, don't return one
del data['ipaotp']
except exception.DuplicateInstanceError as ee:
# NOTE(hrybacki): This likely means an attempt to add a host with
# a hostname that has already enrolled. Look for
# a Forbidden (HTTP 403) in nova-compute.logs
raise base.Fault(webob.exc.HTTPForbidden(explanation=ee.msg))
except Exception as e: # pylint: disable=broad-except
LOG.error('adding host failed %s', e)
LOG.error(traceback.format_exc())

View File

@ -185,7 +185,7 @@ class NotificationEndpoint(object):
@event_handlers('compute.instance.delete.end')
def compute_instance_delete(self, payload, message_id):
hostname_short = payload['hostname']
instance_id = payload['instance_id']
requested_instance_id = payload['instance_id']
payload_metadata = payload['metadata']
image_metadata = payload['image_meta']
@ -200,15 +200,19 @@ class NotificationEndpoint(object):
message_id, hostname)
return
LOG.info("[%s] Delete host %s (%s)", message_id, instance_id, hostname)
LOG.info("[%s] Delete host %s (%s)", message_id,
requested_instance_id, hostname)
try:
ipa = ipaclient()
ipa.delete_host(hostname, {}, message_id)
self.delete_subhosts(ipa, hostname_short, payload_metadata)
ipa.delete_host(hostname, requested_instance_id, {}, message_id)
self.delete_subhosts(ipa, hostname_short, payload_metadata,
message_id)
except exception.IPAConnectionError:
LOG.error("[%s] IPA Connection Error when deleting host %s (%s). "
"Manual cleanup may be required in the IPA server.",
message_id, instance_id, hostname)
message_id, requested_instance_id, hostname)
except exception.DeleteInstanceIdMismatch:
return
@event_handlers('network.floating_ip.associate')
def floaitng_ip_associate(self, payload, message_id):

View File

@ -97,26 +97,31 @@ class TestIPAService(testtools.TestCase):
def test_host_del(self):
global hostname
self.ipaclient.delete_host(hostname)
requested_instance_id = six.text_type(str(uuid.uuid4()))
self.ipaclient.delete_host(hostname, requested_instance_id)
def test_host_expired_ticket(self):
global hostname
# The local krb5.conf is setup to issue tickets for 1 minute
time.sleep(60)
requested_instance_id = six.text_type(str(uuid.uuid4()))
self.ipaclient.delete_host(hostname)
self.ipaclient.delete_host(hostname, requested_instance_id)
def test_host_service(self):
global hostname
ipaotp = str(uuid.uuid4())
requested_instance_id = six.text_type(str(uuid.uuid4()))
metadata = {}
image_metadata = {}
subhost = six.text_type(str(uuid.uuid4()) + '.' + api.env.domain)
service_principal = u'test/%s' % subhost
self.ipaclient.add_host(hostname, ipaotp, metadata, image_metadata)
self.ipaclient.add_host(hostname, ipaotp, metadata, image_metadata,
requested_instance_id)
# FIXME(hrybacki): Subhosts should probably be added by add_subhost
self.ipaclient.add_host(subhost, ipaotp, metadata, image_metadata)
self.ipaclient.add_service(service_principal)
self.ipaclient.service_add_host(service_principal, hostname)
self.ipaclient.delete_subhost(subhost)
self.ipaclient.delete_host(hostname)
self.ipaclient.delete_host(hostname, requested_instance_id)
self.ipaclient.flush_batch_operation()