keystoneauth/keystoneauth1/discover.py

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""The passive components to version discovery.

The Discover object in discover.py contains functions that can create objects
on your behalf. These functions are not usable from within the keystoneauth1
library because you will get dependency resolution issues.

The Discover object in this file provides the querying components of Discovery.
This includes functions like url_for which allow you to retrieve URLs and the
raw data specified in version discovery responses.
"""

import copy
import re

from positional import positional
import six
from six.moves import urllib

from keystoneauth1 import _utils as utils
from keystoneauth1 import exceptions


_LOGGER = utils.get_logger(__name__)


@positional()
def get_version_data(session, url, authenticated=None):
    """Retrieve raw version data from a url."""
    headers = {'Accept': 'application/json'}

    resp = session.get(url, headers=headers, authenticated=authenticated)

    try:
        body_resp = resp.json()
    except ValueError:
        pass
    else:
        # In the event of querying a root URL we will get back a list of
        # available versions.
        try:
            return body_resp['versions']['values']
        except (KeyError, TypeError):
            pass

        # Most servers don't have a 'values' element so accept a simple
        # versions dict if available.
        try:
            return body_resp['versions']
        except KeyError:
            pass

        # Otherwise if we query an endpoint like /v2.0 then we will get back
        # just the one available version.
        try:
            return [body_resp['version']]
        except KeyError:
            pass

    err_text = resp.text[:50] + '...' if len(resp.text) > 50 else resp.text
    raise exceptions.DiscoveryFailure('Invalid Response - Bad version data '
                                      'returned: %s' % err_text)


def normalize_version_number(version):
    """Turn a version representation into a tuple."""
    # if it's an integer or a numeric as a string then normalize it
    # to a string, this ensures 1 decimal point
    # If it's a float as a string, don't do that, the split/map below
    # will do what we want. (Otherwise, we wind up with 3.20 -> (3, 2)
    if isinstance(version, six.string_types):
        # trim the v from a 'v2.0' or similar
        version = version.lstrip('v')
        try:
            # If version is a pure int, like '1' or '200' this will produce
            # a stringified version with a .0 added. If it's any other number,
            # such as '1.1' - int(version) raises an Exception
            version = str(float(int(version)))
        except ValueError:
            pass

    # If it's an int, turn it into a float
    elif isinstance(version, int):
        version = str(float(version))

    elif isinstance(version, float):
        version = str(version)

    # At this point, we should either have a string that contains a number
    # or something decidedly else.

    # if it's a string from above break it on .
    if hasattr(version, 'split'):
        version = version.split('.')

    # It's either an interable, or something else that makes us sad.
    try:
        return tuple(map(int, version))
    except (TypeError, ValueError):
        pass

    raise TypeError('Invalid version specified: %s' % version)


def version_to_string(version):
    """Turn a version tuple into a string."""
    return ".".join([str(x) for x in version])


def version_match(required, candidate):
    """Test that an available version satisfies the required version.

    To be suitable a version must be of the same major version as required
    and be at least a match in minor/patch level.

    eg. 3.3 is a match for a required 3.1 but 4.1 is not.

    :param tuple required: the version that must be met.
    :param tuple candidate: the version to test against required.

    :returns: True if candidate is suitable False otherwise.
    :rtype: bool
    """
    # major versions must be the same (e.g. even though v2 is a lower
    # version than v3 we can't use it if v2 was requested)
    if candidate[0] != required[0]:
        return False

    # prevent selecting a minor version less than what is required
    if candidate < required:
        return False

    return True


def _combine_relative_url(discovery_url, version_url):
    # NOTE(jamielennox): urllib.parse.urljoin allows the url to be relative
    # or even protocol-less. The additional trailing '/' makes urljoin respect
    # the current path as canonical even if the url doesn't include it. for
    # example a "v2" path from http://host/admin should resolve as
    # http://host/admin/v2 where it would otherwise be host/v2. This has no
    # effect on absolute urls.
    url = urllib.parse.urljoin(discovery_url.rstrip('/') + '/', version_url)

    # Parse and recombine the result to squish double //'s from the above
    return urllib.parse.urlparse(url).geturl()


class Discover(object):

    CURRENT_STATUSES = ('stable', 'current', 'supported')
    DEPRECATED_STATUSES = ('deprecated',)
    EXPERIMENTAL_STATUSES = ('experimental',)

    @positional()
    def __init__(self, session, url, authenticated=None):
        self._url = url
        self._data = get_version_data(session, url,
                                      authenticated=authenticated)

    def raw_version_data(self, allow_experimental=False,
                         allow_deprecated=True, allow_unknown=False):
        """Get raw version information from URL.

        Raw data indicates that only minimal validation processing is performed
        on the data, so what is returned here will be the data in the same
        format it was received from the endpoint.

        :param bool allow_experimental: Allow experimental version endpoints.
        :param bool allow_deprecated: Allow deprecated version endpoints.
        :param bool allow_unknown: Allow endpoints with an unrecognised status.

        :returns: The endpoints returned from the server that match the
                  criteria.
        :rtype: list
        """
        versions = []
        for v in self._data:
            try:
                status = v['status']
            except KeyError:
                _LOGGER.warning('Skipping over invalid version data. '
                                'No stability status in version.')
                continue

            status = status.lower()

            if status in self.CURRENT_STATUSES:
                versions.append(v)
            elif status in self.DEPRECATED_STATUSES:
                if allow_deprecated:
                    versions.append(v)
            elif status in self.EXPERIMENTAL_STATUSES:
                if allow_experimental:
                    versions.append(v)
            elif allow_unknown:
                versions.append(v)

        return versions

    @positional()
    def version_data(self, reverse=False, **kwargs):
        """Get normalized version data.

        Return version data in a structured way.

        :param bool reverse: Reverse the list. reverse=true will mean the
                             returned list is sorted from newest to oldest
                             version.
        :returns: A list of version data dictionaries sorted by version number.
                  Each data element in the returned list is a dictionary
                  consisting of at least:

          :version tuple: The normalized version of the endpoint.
          :url str: The url for the endpoint.
          :raw_status str: The status as provided by the server
        :rtype: list(dict)
        """
        data = self.raw_version_data(**kwargs)
        versions = []

        for v in data:
            try:
                version_str = v['id']
            except KeyError:
                _LOGGER.info('Skipping invalid version data. Missing ID.')
                continue

            try:
                links = v['links']
            except KeyError:
                _LOGGER.info('Skipping invalid version data. Missing links')
                continue

            version_number = normalize_version_number(version_str)

            # collect microversion information
            min_microversion = v.get('min_version') or None
            if min_microversion:
                min_microversion = normalize_version_number(min_microversion)
            max_microversion = v.get('max_version', v.get('version')) or None
            if max_microversion:
                max_microversion = normalize_version_number(max_microversion)

            self_url = None
            collection_url = None
            for link in links:
                try:
                    rel = link['rel']
                    url = _combine_relative_url(self._url, link['href'])
                except (KeyError, TypeError):
                    _LOGGER.info('Skipping invalid version link. '
                                 'Missing link URL or relationship.')
                    continue

                if rel.lower() == 'self':
                    self_url = url
                elif rel.lower() == 'collection':
                    collection_url = url
            if not self_url:
                _LOGGER.info('Skipping invalid version data. '
                             'Missing link to endpoint.')
                continue

            versions.append({'version': version_number,
                             'url': self_url,
                             'collection': collection_url,
                             'min_microversion': min_microversion,
                             'max_microversion': max_microversion,
                             'raw_status': v['status']})

        versions.sort(key=lambda v: v['version'], reverse=reverse)
        return versions

    def data_for(self, version, **kwargs):
        """Return endpoint data for a version.

        :param tuple version: The version is always a minimum version in the
            same major release as there should be no compatibility issues with
            using a version newer than the one asked for.

        :returns: the endpoint data for a URL that matches the required version
                  (the format is described in version_data) or None if no
                  match.
        :rtype: dict
        """
        version = normalize_version_number(version)

        for data in self.version_data(reverse=True, **kwargs):
            if version_match(version, data['version']):
                return data

        return None

    def url_for(self, version, **kwargs):
        """Get the endpoint url for a version.

        :param tuple version: The version is always a minimum version in the
            same major release as there should be no compatibility issues with
            using a version newer than the one asked for.

        :returns: The url for the specified version or None if no match.
        :rtype: str
        """
        data = self.data_for(version, **kwargs)
        return data['url'] if data else None


class EndpointData(object):
    """Normalized information about a discovered endpoint.

    Contains url, version, microversion, interface and region information.
    This is essentially the data contained in the catalog and the version
    discovery documents about an endpoint that is used to select the endpoint
    desired by the user. It is returned so that a user can know which qualities
    a discovered endpoint had, in case their request allowed for a range of
    possibilities.
    """

    @positional()
    def __init__(self,
                 catalog_url=None,
                 service_url=None,
                 service_type=None,
                 service_name=None,
                 service_id=None,
                 region_name=None,
                 interface=None,
                 endpoint_id=None,
                 raw_endpoint=None,
                 api_version=None,
                 major_version=None,
                 min_microversion=None,
                 max_microversion=None):
        self.catalog_url = catalog_url
        self.service_url = service_url
        self.service_type = service_type
        self.service_name = service_name
        self.service_id = service_id
        self.interface = interface
        self.region_name = region_name
        self.endpoint_id = endpoint_id
        self.raw_endpoint = raw_endpoint
        self.api_version = api_version
        self.major_version = major_version
        self.min_microversion = min_microversion
        self.max_microversion = max_microversion
        self._saved_project_id = None
        self._catalog_matches_version = False

    def __copy__(self):
        """Return a new EndpointData based on this one."""
        return EndpointData(
            catalog_url=self.catalog_url,
            service_url=self.service_url,
            service_type=self.service_type,
            service_name=self.service_name,
            service_id=self.service_id,
            region_name=self.region_name,
            interface=self.interface,
            endpoint_id=self.endpoint_id,
            raw_endpoint=self.raw_endpoint,
            api_version=self.api_version,
            major_version=self.major_version,
            min_microversion=self.min_microversion,
            max_microversion=self.max_microversion)

    @property
    def url(self):
        return self.service_url or self.catalog_url

    @positional(3)
    def get_versioned_data(self, session, version,
                           authenticated=False, allow=None, cache=None,
                           allow_version_hack=True, project_id=None):
        """Run version discovery for the service described.

        Performs Version Discovery and returns a new EndpointData object with
        information found.

        :param session: A session object that can be used for communication.
        :type session: keystoneauth1.session.Session
        :param tuple version: The minimum major version required for this
                              endpoint.
        :param string project_id: ID of the currently scoped project. Used for
                                  removing project_id components of URLs from
                                  the catalog. (optional)
        :param dict allow: Extra filters to pass when discovering API
                           versions. (optional)
        :param bool allow_version_hack: Allow keystoneauth to hack up catalog
                                        URLS to support older schemes.
                                        (optional, default True)
        :param dict cache: A dict to be used for caching results in
                           addition to caching them on the Session.
                           (optional)
        :param bool authenticated: Include a token in the discovery call.
                                   (optional) Defaults to False.

        :raises keystoneauth1.exceptions.http.HttpError: An error from an
                                                         invalid HTTP response.
        """
        if not allow:
            allow = {}

        # This method should always return a new EndpointData
        new_data = copy.copy(self)

        if not version:
            # NOTE(jamielennox): This may not be the best thing to default to
            # but is here for backwards compatibility. It may be worth
            # defaulting to the most recent version.
            return new_data

        new_data._set_version_info(
            session=session, version=version, authenticated=authenticated,
            allow=allow, cache=cache, allow_version_hack=allow_version_hack,
            project_id=project_id)
        return new_data

    def _set_version_info(self, session, version,
                          authenticated=False, allow=None, cache=None,
                          allow_version_hack=True, project_id=None):
        if project_id:
            self.project_id = project_id

        disc = None
        vers_url = None
        tried = set()
        for vers_url in self._get_url_choices(version, project_id,
                                              allow_version_hack):

            if vers_url in tried:
                continue
            tried.update(vers_url)
            try:
                disc = get_discovery(session, vers_url,
                                     cache=cache,
                                     authenticated=False)
                break
            except (exceptions.DiscoveryFailure,
                    exceptions.HttpError,
                    exceptions.ConnectionError):
                continue
        if not disc:
            # We couldn't find a version discovery document anywhere.
            if self._catalog_matches_version:
                # But - the version in the catalog is fine.
                self.service_url = self.catalog_url
                return

            # NOTE(jamielennox): The logic here is required for backwards
            # compatibility. By itself it is not ideal.
            if allow_version_hack:
                # NOTE(jamielennox): If we can't contact the server we
                # fall back to just returning the URL from the catalog.  This
                # is backwards compatible behaviour and used when there is no
                # other choice. Realistically if you have provided a version
                # you should be able to rely on that version being returned or
                # the request failing.
                _LOGGER.warning(
                    'Failed to contact the endpoint at %s for '
                    'discovery. Fallback to using that endpoint as '
                    'the base url.', self.url)
                return

            else:
                # NOTE(jamielennox): If you've said no to allow_version_hack
                # and we can't determine the actual URL this is a failure
                # because we are specifying that the deployment must be up to
                # date enough to properly specify a version and keystoneauth
                # can't deliver.
                raise exceptions.DiscoveryFailure(
                    "Version requested but version discovery document was not"
                    " found and allow_version_hack was False")

        # NOTE(jamielennox): urljoin allows the url to be relative or even
        # protocol-less. The additional trailing '/' make urljoin respect
        # the current path as canonical even if the url doesn't include it.
        # for example a "v2" path from http://host/admin should resolve as
        # http://host/admin/v2 where it would otherwise be host/v2.
        # This has no effect on absolute urls returned from url_for.
        discovered_data = disc.data_for(version, **allow)
        if not discovered_data:
            raise exceptions.DiscoveryFailure(
                "Version {version} requested, but was not found".format(
                    version=version_to_string(version)))

        self.min_microversion = discovered_data['min_microversion']
        self.max_microversion = discovered_data['max_microversion']

        discovered_url = discovered_data['url']

        url = urllib.parse.urljoin(vers_url.rstrip('/') + '/', discovered_url)

        # If we had to pop a project_id from the catalog_url, put it back on
        if self._saved_project_id:
            url = urllib.parse.urljoin(url.rstrip('/') + '/',
                                       self._saved_project_id)
        self.service_url = url

    def _get_url_choices(self, version, project_id, allow_version_hack=True):
        if allow_version_hack:
            url = urllib.parse.urlparse(self.url)
            url_parts = url.path.split('/')

            # First, check to see if the catalog url ends with a project id
            # We need to remove it and save it for later if it does
            if project_id and url_parts[-1].endswith(project_id):
                self._saved_project_id = url_parts.pop()

            # Next, check to see if the url indicates a version and if that
            # version matches our request. If so, we can start by trying
            # the given url as it has a high potential for success
            url_version = None
            if url_parts[-1].startswith('v'):
                try:
                    url_version = normalize_version_number(url_parts[-1])
                except TypeError:
                    pass
            if url_version:
                if version_match(version, url_version):
                    self._catalog_matches_version = True
                    # This endpoint matches the version request, try it first
                    yield urllib.parse.ParseResult(
                        url.scheme,
                        url.netloc,
                        '/'.join(url_parts),
                        url.params,
                        url.query,
                        url.fragment).geturl()
                url_parts.pop()

            # If there were projects or versions in the url they are now gone.
            # That means we're left with the unversioned url
            yield urllib.parse.ParseResult(
                url.scheme,
                url.netloc,
                '/'.join(url_parts),
                url.params,
                url.query,
                url.fragment).geturl()

            # NOTE(mordred): For backwards compatibility people might have
            # added version hacks using the version hack system. The logic
            # above should handle most cases, so by the time we get here it's
            # most likely to be a no-op
            yield self._get_catalog_discover_hack()

        # As a final fallthrough case, add the url from the catalog. If hacks
        # are turned off, this will be the only choice.
        yield self.catalog_url

    def _get_catalog_discover_hack(self):
        """Apply the catalog hacks and figure out an unversioned endpoint.

        This function is internal to keystoneauth1.

        :param bool allow_version_hack: Whether or not to allow version hacks
                                        to be applied. (defaults to True)

        :returns: A potential unversioned url
        """
        return _VERSION_HACKS.get_discover_hack(self.service_type, self.url)


@positional()
def get_discovery(session, url, cache=None, authenticated=False):
    """Return the discovery object for a URL.

    Check the session and the plugin cache to see if we have already
    performed discovery on the URL and if so return it, otherwise create
    a new discovery object, cache it and return it.

    NOTE: This function is expected to be used by keystoneauth and should not
    be needed by users part of normal usage. A normal user should use
    get_endpoint or get_endpoint_data on `keystoneauth.session.Session` or
    endpoint_filters on `keystoneauth.session.Session` or
    `keystoneauth.session.Session`. However, should the user need to perform
    direct discovery for some reason, this function should be used so that
    the discovery caching is used.

    :param session: A session object to discover with.
    :type session: keystoneauth1.session.Session
    :param str url: The url to lookup.
    :param dict cache:
        A dict to be used for caching results, in addition to caching them
        on the Session. (optional) Defaults to None.
    :param bool authenticated:
        Include a token in the discovery call. (optional) Defaults to None,
        which will use a token if an auth plugin is installed.

    :raises keystoneauth1.exceptions.discovery.DiscoveryFailure:
        if for some reason the lookup fails.
    :raises keystoneauth1.exceptions.http.HttpError:
        An error from an invalid HTTP response.

    :returns: A discovery object with the results of looking up that URL.
    :rtype: :py:class:`keystoneauth1.discover.Discovery`
    """
    # There are between one and three different caches. The user may have
    # passed one in. There is definitely one on the session, and there is
    # one on the auth plugin if the Session has an auth plugin.
    caches = []

    # If a cache was passed in, check it first.
    if cache is not None:
        caches.append(cache)

    # If the session has a cache, check it second, since it could have been
    # provided by the user at Session creation time.
    if hasattr(session, '_discovery_cache'):
        caches.append(session._discovery_cache)

    # Finally check the auth cache associated with the Session.
    if session.auth and hasattr(session.auth, '_discovery_cache'):
        caches.append(session.auth._discovery_cache)

    for cache in caches:
        disc = cache.get(url)

        if disc:
            break
    else:
        disc = Discover(session, url, authenticated=authenticated)

    # Whether we get one from fetching or from cache, set it in the
    # caches. This assures that if we combine sessions and auth plugins
    # that we don't make unnecesary calls.
    if disc:
        for cache in caches:
            cache[url] = disc

    return disc


class _VersionHacks(object):
    """A container to abstract the list of version hacks.

    This could be done as simply a dictionary but is abstracted like this to
    make for easier testing.
    """

    def __init__(self):
        self._discovery_data = {}

    def add_discover_hack(self, service_type, old, new=''):
        """Add a new hack for a service type.

        :param str service_type: The service_type in the catalog.
        :param re.RegexObject old: The pattern to use.
        :param str new: What to replace the pattern with.
        """
        hacks = self._discovery_data.setdefault(service_type, [])
        hacks.append((old, new))

    def get_discover_hack(self, service_type, url):
        """Apply the catalog hacks and figure out an unversioned endpoint.

        :param str service_type: the service_type to look up.
        :param str url: The original url that came from a service_catalog.

        :returns: Either the unversioned url or the one from the catalog
                  to try.
        """
        for old, new in self._discovery_data.get(service_type, []):
            new_string, number_of_subs_made = old.subn(new, url)
            if number_of_subs_made > 0:
                return new_string

        return url


_VERSION_HACKS = _VersionHacks()
_VERSION_HACKS.add_discover_hack('identity', re.compile('/v2.0/?$'), '/')


def add_catalog_discover_hack(service_type, old, new):
    """Add a version removal rule for a particular service.

    Originally deployments of OpenStack would contain a versioned endpoint in
    the catalog for different services. E.g. an identity service might look
    like ``http://localhost:5000/v2.0``. This is a problem when we want to use
    a different version like v3.0 as there is no way to tell where it is
    located. We cannot simply change all service catalogs either so there must
    be a way to handle the older style of catalog.

    This function adds a rule for a given service type that if part of the URL
    matches a given regular expression in *old* then it will be replaced with
    the *new* value. This will replace all instances of old with new. It should
    therefore contain a regex anchor.

    For example the included rule states::

        add_catalog_version_hack('identity', re.compile('/v2.0/?$'), '/')

    so if the catalog retrieves an *identity* URL that ends with /v2.0 or
    /v2.0/ then it should replace it simply with / to fix the user's catalog.

    :param str service_type: The service type as defined in the catalog that
                             the rule will apply to.
    :param re.RegexObject old: The regular expression to search for and replace
                               if found.
    :param str new: The new string to replace the pattern with.
    """
    _VERSION_HACKS.add_discover_hack(service_type, old, new)