From 2c4e606b350b38e561c94f6f3eafbaa720081266 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 4 Mar 2017 14:14:29 -0600 Subject: [PATCH 01/34] Start working towards 1.0 --- setup.py | 26 ++++++++----------- {rfc3986 => src/rfc3986}/__init__.py | 0 {rfc3986 => src/rfc3986}/api.py | 0 {rfc3986 => src/rfc3986}/compat.py | 0 {rfc3986 => src/rfc3986}/exceptions.py | 0 {rfc3986 => src/rfc3986}/misc.py | 0 {rfc3986 => src/rfc3986}/normalizers.py | 0 {rfc3986 => src/rfc3986}/parseresult.py | 0 {rfc3986 => src/rfc3986}/uri.py | 0 tox.ini | 34 +++++++++++++++++-------- 10 files changed, 35 insertions(+), 25 deletions(-) rename {rfc3986 => src/rfc3986}/__init__.py (100%) rename {rfc3986 => src/rfc3986}/api.py (100%) rename {rfc3986 => src/rfc3986}/compat.py (100%) rename {rfc3986 => src/rfc3986}/exceptions.py (100%) rename {rfc3986 => src/rfc3986}/misc.py (100%) rename {rfc3986 => src/rfc3986}/normalizers.py (100%) rename {rfc3986 => src/rfc3986}/parseresult.py (100%) rename {rfc3986 => src/rfc3986}/uri.py (100%) diff --git a/setup.py b/setup.py index 10ce851..b1173ea 100755 --- a/setup.py +++ b/setup.py @@ -1,20 +1,14 @@ -#!/usr/bin/env python - +"""Packaging logic for the rfc3986 library.""" import io import os import sys +import setuptools + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) # noqa + import rfc3986 -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - -if sys.argv[-1] == 'publish': - os.system('python setup.py bdist_wheel sdist upload') - sys.exit() - packages = [ 'rfc3986', ] @@ -25,15 +19,16 @@ with io.open('README.rst', encoding='utf-8') as f: with io.open('HISTORY.rst', encoding='utf-8') as f: history = f.read() -setup( +setuptools.setup( name='rfc3986', version=rfc3986.__version__, description='Validating URI References per RFC 3986', long_description=readme + '\n\n' + history, author='Ian Cordasco', - author_email='ian.cordasco@rackspace.com', - url='https://rfc3986.readthedocs.org', + author_email='graffatcolmingov@gmail.com', + url='http://rfc3986.readthedocs.io', packages=packages, + package_dir={'': 'src/'}, package_data={'': ['LICENSE']}, include_package_data=True, license='Apache 2.0', @@ -43,10 +38,11 @@ setup( 'Natural Language :: English', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ), ) diff --git a/rfc3986/__init__.py b/src/rfc3986/__init__.py similarity index 100% rename from rfc3986/__init__.py rename to src/rfc3986/__init__.py diff --git a/rfc3986/api.py b/src/rfc3986/api.py similarity index 100% rename from rfc3986/api.py rename to src/rfc3986/api.py diff --git a/rfc3986/compat.py b/src/rfc3986/compat.py similarity index 100% rename from rfc3986/compat.py rename to src/rfc3986/compat.py diff --git a/rfc3986/exceptions.py b/src/rfc3986/exceptions.py similarity index 100% rename from rfc3986/exceptions.py rename to src/rfc3986/exceptions.py diff --git a/rfc3986/misc.py b/src/rfc3986/misc.py similarity index 100% rename from rfc3986/misc.py rename to src/rfc3986/misc.py diff --git a/rfc3986/normalizers.py b/src/rfc3986/normalizers.py similarity index 100% rename from rfc3986/normalizers.py rename to src/rfc3986/normalizers.py diff --git a/rfc3986/parseresult.py b/src/rfc3986/parseresult.py similarity index 100% rename from rfc3986/parseresult.py rename to src/rfc3986/parseresult.py diff --git a/rfc3986/uri.py b/src/rfc3986/uri.py similarity index 100% rename from rfc3986/uri.py rename to src/rfc3986/uri.py diff --git a/tox.ini b/tox.ini index 0a94d31..e579c75 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py32,py33,py34,pypy,{py27,py34}-flake8 +envlist = py27,py33,py34,py35,py36,pypy,flake8 [testenv] pip_pre = False @@ -12,24 +12,26 @@ commands = deps = {[testenv]deps} commands = py.test {posargs} -[testenv:py27-flake8] -basepython = python2.7 +[testenv:flake8] +basepython = python3 deps = flake8 -commands = flake8 {posargs} rfc3986 + flake8-docstrings + flake8-import-order +commands = flake8 {posargs} src/rfc3986 -[testenv:py34-flake8] -basepython = python3.4 +[testenv:build] deps = - flake8 -commands = flake8 {posargs} rfc3986 + wheel +commands = + python setup.py sdist bdist_wheel [testenv:release] deps = - wheel + {[testenv:build]deps} twine>=1.4.0 commands = - python setup.py sdist bdist_wheel + {[testenv:build]commands} twine upload {posargs:--skip-existing dist/*} [testenv:docs] @@ -47,3 +49,15 @@ commands = [pytest] addopts = -q norecursedirs = *.egg .git .* _* + +[flake8] +exclude = + .tox, + .git, + __pycache__, + *.pyc, + *.egg-info, + .cache, + .eggs +max-complexity = 10 +import-order-style = google From 08335d75b5cc7f73771385d7dec0df112b95a33d Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 4 Mar 2017 19:28:48 -0600 Subject: [PATCH 02/34] Fix up flake8 errors --- src/rfc3986/__init__.py | 15 +-- src/rfc3986/api.py | 9 +- src/rfc3986/compat.py | 3 + src/rfc3986/exceptions.py | 14 +++ src/rfc3986/misc.py | 3 +- src/rfc3986/normalizers.py | 19 +++- src/rfc3986/parseresult.py | 14 +++ src/rfc3986/uri.py | 195 ++++++++++++++++++++++++++----------- 8 files changed, 195 insertions(+), 77 deletions(-) diff --git a/src/rfc3986/__init__.py b/src/rfc3986/__init__.py index e32520e..89e7b97 100644 --- a/src/rfc3986/__init__.py +++ b/src/rfc3986/__init__.py @@ -14,18 +14,19 @@ # limitations under the License. """ -rfc3986 -======= +An implementation of semantics and validations described in RFC 3986. -An implementation of semantics and validations described in RFC 3986. See -http://rfc3986.rtfd.org/ for documentation. +See http://rfc3986.readthedocs.io/ for detailed documentation. :copyright: (c) 2014 Rackspace :license: Apache v2.0, see LICENSE for details """ -from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri, - urlparse) +from .api import is_valid_uri +from .api import normalize_uri +from .api import uri_reference +from .api import URIReference +from .api import urlparse from .parseresult import ParseResult __title__ = 'rfc3986' @@ -33,7 +34,7 @@ __author__ = 'Ian Cordasco' __author_email__ = 'graffatcolmingov@gmail.com' __license__ = 'Apache v2.0' __copyright__ = 'Copyright 2014 Rackspace' -__version__ = '0.4.1' +__version__ = '1.0.0.0b0' __all__ = ( 'ParseResult', diff --git a/src/rfc3986/api.py b/src/rfc3986/api.py index 3e9e401..17f4daf 100644 --- a/src/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -rfc3986.api -~~~~~~~~~~~ +Module containing the simple and functional API for rfc3986. -This defines the simple API to rfc3986. This module defines 3 functions and -provides access to the class ``URIReference``. +This module defines functions and provides access to the public attributes +and classes of rfc3986. """ -from .uri import URIReference from .parseresult import ParseResult +from .uri import URIReference def uri_reference(uri, encoding='utf-8'): diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 6fc7f6d..9888e23 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -12,6 +12,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Compatibility module for Python 2 and 3 support.""" import sys @@ -20,12 +21,14 @@ if sys.version_info >= (3, 0): def to_str(b, encoding): + """Ensure that b is text in the specified encoding.""" if hasattr(b, 'decode') and not isinstance(b, unicode): b = b.decode('utf-8') return b def to_bytes(s, encoding): + """Ensure that s is converted to bytes from the encoding.""" if hasattr(s, 'encode') and not isinstance(s, bytes): s = s.encode('utf-8') return s diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py index f9adbde..f0f84d9 100644 --- a/src/rfc3986/exceptions.py +++ b/src/rfc3986/exceptions.py @@ -1,21 +1,35 @@ # -*- coding: utf-8 -*- +"""Exceptions module for rfc3986.""" + + class RFC3986Exception(Exception): + """Base class for all rfc3986 exception classes.""" + pass class InvalidAuthority(RFC3986Exception): + """Exception when the authority string is invalid.""" + def __init__(self, authority): + """Initialize the exception with the invalid authority.""" super(InvalidAuthority, self).__init__( "The authority ({0}) is not valid.".format(authority)) class InvalidPort(RFC3986Exception): + """Exception when the port is invalid.""" + def __init__(self, port): + """Initialize the exception with the invalid port.""" super(InvalidPort, self).__init__( 'The port ("{0}") is not valid.'.format(port)) class ResolutionError(RFC3986Exception): + """Exception to indicate a failure to resolve a URI.""" + def __init__(self, uri): + """Initialize the error with the failed URI.""" super(ResolutionError, self).__init__( "{0} is not an absolute URI.".format(uri.unsplit())) diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index bb2ed82..83c98b1 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -13,8 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -rfc3986.misc -~~~~~~~~~~~~ +Module containing compiled regular expressions and constants. This module contains important constants, patterns, and compiled regular expressions for parsing and validating URIs and their components. diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 9e0812e..e497c58 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -12,17 +12,20 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Module with functions to normalize components.""" import re -from .compat import to_bytes -from .misc import NON_PCT_ENCODED +from . import compat +from . import misc def normalize_scheme(scheme): + """Normalize the scheme component.""" return scheme.lower() def normalize_authority(authority): + """Normalize an authority tuple to a string.""" userinfo, host, port = authority result = '' if userinfo: @@ -35,6 +38,7 @@ def normalize_authority(authority): def normalize_path(path): + """Normalize the path string.""" if not path: return path @@ -43,12 +47,14 @@ def normalize_path(path): def normalize_query(query): + """Normalize the query string.""" if not query: return query return normalize_percent_characters(query) def normalize_fragment(fragment): + """Normalize the fragment string.""" if not fragment: return fragment return normalize_percent_characters(fragment) @@ -70,6 +76,10 @@ def normalize_percent_characters(s): def remove_dot_segments(s): + """Remove dot segments from the string. + + See also Section 5.2.4 of :rfc:`3986`. + """ # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code segments = s.split('/') # Turn the path into a list of segments output = [] # Initialize the variable to use to store output @@ -100,10 +110,11 @@ def remove_dot_segments(s): def encode_component(uri_component, encoding): + """Encode the specific component in the provided encoding.""" if uri_component is None: return uri_component - uri_bytes = to_bytes(uri_component, encoding) + uri_bytes = compat.to_bytes(uri_component, encoding) encoded_uri = bytearray() @@ -111,7 +122,7 @@ def encode_component(uri_component, encoding): # Will return a single character bytestring on both Python 2 & 3 byte = uri_bytes[i:i+1] byte_ord = ord(byte) - if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED: + if byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED: encoded_uri.extend(byte) continue encoded_uri.extend('%{0:02x}'.format(byte_ord).encode()) diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index 689ab1c..dc9d4d5 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -12,6 +12,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Module containing the urlparse compatibility logic.""" from collections import namedtuple from . import compat @@ -65,10 +66,17 @@ class ParseResultMixin(object): class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), ParseResultMixin): + """Implementation of urlparse compatibility class. + + This uses the URIReference logic to handle compatibility with the + urlparse.ParseResult class. + """ + slots = () def __new__(cls, scheme, userinfo, host, port, path, query, fragment, uri_ref, encoding='utf-8'): + """Create a new ParseResult.""" parse_result = super(ParseResult, cls).__new__( cls, scheme or None, @@ -144,6 +152,7 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), def copy_with(self, scheme=None, userinfo=None, host=None, port=None, path=None, query=None, fragment=None): + """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} @@ -160,6 +169,7 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) def encode(self, encoding=None): + """Convert to an instance of ParseResultBytes.""" encoding = encoding or self.encoding attrs = dict( zip(PARSED_COMPONENTS, @@ -187,8 +197,11 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), ParseResultMixin): + """Compatibility shim for the urlparse.ParseResultBytes object.""" + def __new__(cls, scheme, userinfo, host, port, path, query, fragment, uri_ref, encoding='utf-8', lazy_normalize=True): + """Create a new ParseResultBytes instance.""" parse_result = super(ParseResultBytes, cls).__new__( cls, scheme or None, @@ -272,6 +285,7 @@ class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), def copy_with(self, scheme=None, userinfo=None, host=None, port=None, path=None, query=None, fragment=None, lazy_normalize=True): + """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 2a86c6b..2dd183d 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -1,3 +1,4 @@ +"""Module containing the implementation of the URIReference class.""" # -*- coding: utf-8 -*- # Copyright (c) 2014 Rackspace # Copyright (c) 2015 Ian Cordasco @@ -15,24 +16,75 @@ # limitations under the License. from collections import namedtuple -from .compat import to_str -from .exceptions import InvalidAuthority, ResolutionError -from .misc import ( - ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, - QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, - URI_COMPONENTS, merge_paths - ) -from .normalizers import ( - encode_component, normalize_scheme, normalize_authority, normalize_path, - normalize_query, normalize_fragment - ) +from . import compat +from . import exceptions as exc +from . import misc +from . import normalizers -class URIReference(namedtuple('URIReference', URI_COMPONENTS)): +class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): + """Immutable object representing a parsed URI Reference. + + .. note:: + + This class is not intended to be directly instantiated by the user. + + This object exposes attributes for the following components of a + URI: + + - scheme + - authority + - path + - query + - fragment + + .. attribute:: scheme + + The scheme that was parsed for the URI Reference. For example, + ``http``, ``https``, ``smtp``, ``imap``, etc. + + .. attribute:: authority + + Component of the URI that contains the user information, host, + and port sub-components. For example, + ``google.com``, ``127.0.0.1:5000``, ``username@[::1]``, + ``username:password@example.com:443``, etc. + + .. attribute:: path + + The path that was parsed for the given URI Reference. For example, + ``/``, ``/index.php``, etc. + + .. attribute:: query + + The query component for a given URI Reference. For example, ``a=b``, + ``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc. + + .. attribute:: fragment + + The fragment component of a URI. For example, ``section-3.1``. + + This class also provides extra attributes for easier access to information + like the subcomponents of the authority component. + + .. attribute:: userinfo + + The user information parsed from the authority. + + .. attribute:: host + + The hostname, IPv4, or IPv6 adddres parsed from the authority. + + .. attribute:: port + + The port parsed from the authority. + """ + slots = () def __new__(cls, scheme, authority, path, query, fragment, encoding='utf-8'): + """Create a new URIReference.""" ref = super(URIReference, cls).__new__( cls, scheme or None, @@ -44,6 +96,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): return ref def __eq__(self, other): + """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): other_ref = URIReference(*other) @@ -67,49 +120,52 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :param str encoding: The encoding of the string provided :returns: :class:`URIReference` or subclass thereof """ - uri_string = to_str(uri_string, encoding) + uri_string = compat.to_str(uri_string, encoding) - split_uri = URI_MATCHER.match(uri_string).groupdict() - return cls(split_uri['scheme'], split_uri['authority'], - encode_component(split_uri['path'], encoding), - encode_component(split_uri['query'], encoding), - encode_component(split_uri['fragment'], encoding), encoding) + split_uri = misc.URI_MATCHER.match(uri_string).groupdict() + return cls( + split_uri['scheme'], split_uri['authority'], + normalizers.encode_component(split_uri['path'], encoding), + normalizers.encode_component(split_uri['query'], encoding), + normalizers.encode_component(split_uri['fragment'], encoding), + encoding, + ) def authority_info(self): - """Returns a dictionary with the ``userinfo``, ``host``, and ``port``. + """Return a dictionary with the ``userinfo``, ``host``, and ``port``. - If the authority is not valid, it will raise a ``InvalidAuthority`` - Exception. + If the authority is not valid, it will raise a + :class:`~rfc3986.exceptions.InvalidAuthority` Exception. :returns: ``{'userinfo': 'username:password', 'host': 'www.example.com', 'port': '80'}`` :rtype: dict - :raises InvalidAuthority: If the authority is not ``None`` and can not - be parsed. + :raises rfc3986.exceptions.InvalidAuthority: + If the authority is not ``None`` and can not be parsed. """ if not self.authority: return {'userinfo': None, 'host': None, 'port': None} - match = SUBAUTHORITY_MATCHER.match(self.authority) + match = misc.SUBAUTHORITY_MATCHER.match(self.authority) if match is None: # In this case, we have an authority that was parsed from the URI # Reference, but it cannot be further parsed by our - # SUBAUTHORITY_MATCHER. In this case it must not be a valid + # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid # authority. - raise InvalidAuthority(self.authority.encode(self.encoding)) + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) # We had a match, now let's ensure that it is actually a valid host # address if it is IPv4 matches = match.groupdict() host = matches.get('host') - if (host and IPv4_MATCHER.match(host) and not + if (host and misc.IPv4_MATCHER.match(host) and not valid_ipv4_host_address(host)): # If we have a host, it appears to be IPv4 and it does not have # valid bytes, it is an InvalidAuthority. - raise InvalidAuthority(self.authority.encode(self.encoding)) + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) return matches @@ -118,16 +174,16 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """If present, a string representing the host.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['host'] @property def port(self): - """If present, the port (as a string) extracted from the authority.""" + """If present, the port extracted from the authority.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['port'] @@ -136,7 +192,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """If present, the userinfo extracted from the authority.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['userinfo'] @@ -148,10 +204,10 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :returns: ``True`` if it is an absolute URI, ``False`` otherwise. :rtype: bool """ - return bool(ABSOLUTE_URI_MATCHER.match(self.unsplit())) + return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) def is_valid(self, **kwargs): - """Determines if the URI is valid. + """Determine if the URI is valid. :param bool require_scheme: Set to ``True`` if you wish to require the presence of the scheme component. @@ -184,7 +240,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): return value is None or matcher.match(value) def authority_is_valid(self, require=False): - """Determines if the authority component is valid. + """Determine if the authority component is valid. :param str require: Set to ``True`` to require the presence of this component. @@ -193,15 +249,15 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """ try: self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return False is_valid = self._is_valid(self.authority, - SUBAUTHORITY_MATCHER, + misc.SUBAUTHORITY_MATCHER, require) # Ensure that IPv4 addresses have valid bytes - if is_valid and self.host and IPv4_MATCHER.match(self.host): + if is_valid and self.host and misc.IPv4_MATCHER.match(self.host): return valid_ipv4_host_address(self.host) # Perhaps the host didn't exist or if it did, it wasn't an IPv4-like @@ -210,47 +266,47 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): return is_valid def scheme_is_valid(self, require=False): - """Determines if the scheme component is valid. + """Determine if the scheme component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the scheme is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.scheme, SCHEME_MATCHER, require) + return self._is_valid(self.scheme, misc.SCHEME_MATCHER, require) def path_is_valid(self, require=False): - """Determines if the path component is valid. + """Determine if the path component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the path is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.path, PATH_MATCHER, require) + return self._is_valid(self.path, misc.PATH_MATCHER, require) def query_is_valid(self, require=False): - """Determines if the query component is valid. + """Determine if the query component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the query is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.query, QUERY_MATCHER, require) + return self._is_valid(self.query, misc.QUERY_MATCHER, require) def fragment_is_valid(self, require=False): - """Determines if the fragment component is valid. + """Determine if the fragment component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the fragment is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.fragment, FRAGMENT_MATCHER, require) + return self._is_valid(self.fragment, misc.FRAGMENT_MATCHER, require) def normalize(self): - """Normalize this reference as described in Section 6.2.2 + """Normalize this reference as described in Section 6.2.2. This is not an in-place normalization. Instead this creates a new URIReference. @@ -260,12 +316,12 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """ # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in # this method. - return URIReference(normalize_scheme(self.scheme or ''), - normalize_authority( + return URIReference(normalizers.normalize_scheme(self.scheme or ''), + normalizers.normalize_authority( (self.userinfo, self.host, self.port)), - normalize_path(self.path or ''), - normalize_query(self.query), - normalize_fragment(self.fragment), + normalizers.normalize_path(self.path or ''), + normalizers.normalize_query(self.query), + normalizers.normalize_fragment(self.fragment), self.encoding) def normalized_equality(self, other_ref): @@ -291,13 +347,14 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :returns: A new URIReference which is the result of resolving this reference using ``base_uri``. :rtype: :class:`URIReference` - :raises ResolutionError: If the ``base_uri`` is not an absolute URI. + :raises rfc3986.exceptions.ResolutionError: + If the ``base_uri`` is not an absolute URI. """ if not isinstance(base_uri, URIReference): base_uri = URIReference.from_string(base_uri) if not base_uri.is_absolute(): - raise ResolutionError(base_uri) + raise exc.ResolutionError(base_uri) # This is optional per # http://tools.ietf.org/html/rfc3986#section-5.2.1 @@ -311,12 +368,14 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): # http://tools.ietf.org/html/rfc3986#page-32 if resolving.scheme is not None: - target = resolving.copy_with(path=normalize_path(resolving.path)) + target = resolving.copy_with( + path=normalizers.normalize_path(resolving.path) + ) else: if resolving.authority is not None: target = resolving.copy_with( scheme=base_uri.scheme, - path=normalize_path(resolving.path) + path=normalizers.normalize_path(resolving.path) ) else: if resolving.path is None: @@ -332,10 +391,10 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): ) else: if resolving.path.startswith('/'): - path = normalize_path(resolving.path) + path = normalizers.normalize_path(resolving.path) else: - path = normalize_path( - merge_paths(base_uri, resolving.path) + path = normalizers.normalize_path( + misc.merge_paths(base_uri, resolving.path) ) target = resolving.copy_with( scheme=base_uri.scheme, @@ -367,6 +426,23 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): def copy_with(self, scheme=None, authority=None, path=None, query=None, fragment=None): + """Create a copy of this reference with the new components. + + :param str scheme: + (optional) The scheme to use for the new reference. + :param str authority: + (optional) The authority to use for the new reference. + :param str path: + (optional) The path to use for the new reference. + :param str query: + (optional) The query to use for the new reference. + :param str fragment: + (optional) The fragment to use for the new reference. + :returns: + New URIReference with provided components. + :rtype: + URIReference + """ attributes = { 'scheme': scheme, 'authority': authority, @@ -383,6 +459,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): def valid_ipv4_host_address(host): + """Determine if the given host is a valid IPv4 address.""" # If the host exists, and it might be IPv4, check each byte in the # address. return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) From 06618935b34a5a99caedc6138dbf7439deebb319 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 5 Mar 2017 08:32:13 -0600 Subject: [PATCH 03/34] Move validation logic to separate submodule In preparation for the URIBuilder work, we will need our validators to be separate from our URIReference class. They mostly do not rely on the instance of URIRefence already so making them functions in a module makes sense. --- src/rfc3986/uri.py | 53 ++++++----------- src/rfc3986/validators.py | 121 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 36 deletions(-) create mode 100644 src/rfc3986/validators.py diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 2dd183d..98dc437 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -20,6 +20,7 @@ from . import compat from . import exceptions as exc from . import misc from . import normalizers +from . import validators class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): @@ -162,7 +163,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): host = matches.get('host') if (host and misc.IPv4_MATCHER.match(host) and not - valid_ipv4_host_address(host)): + validators.valid_ipv4_host_address(host)): # If we have a host, it appears to be IPv4 and it does not have # valid bytes, it is an InvalidAuthority. raise exc.InvalidAuthority(self.authority.encode(self.encoding)) @@ -231,39 +232,26 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): ] return all(v(r) for v, r in validators) - def _is_valid(self, value, matcher, require): - if require: - return (value is not None - and matcher.match(value)) - - # require is False and value is not None - return value is None or matcher.match(value) - def authority_is_valid(self, require=False): """Determine if the authority component is valid. - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the authority is valid. ``False`` otherwise. - :rtype: bool + :param bool require: + Set to ``True`` to require the presence of this component. + :returns: + ``True`` if the authority is valid. ``False`` otherwise. + :rtype: + bool """ try: self.authority_info() except exc.InvalidAuthority: return False - is_valid = self._is_valid(self.authority, - misc.SUBAUTHORITY_MATCHER, - require) - - # Ensure that IPv4 addresses have valid bytes - if is_valid and self.host and misc.IPv4_MATCHER.match(self.host): - return valid_ipv4_host_address(self.host) - - # Perhaps the host didn't exist or if it did, it wasn't an IPv4-like - # address. In either case, we want to rely on the `_is_valid` check, - # so let's return that. - return is_valid + return validators.authority_is_valid( + self.authority, + host=self.host, + require=require, + ) def scheme_is_valid(self, require=False): """Determine if the scheme component is valid. @@ -273,7 +261,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): :returns: ``True`` if the scheme is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.scheme, misc.SCHEME_MATCHER, require) + return validators.scheme_is_valid(self.scheme, require) def path_is_valid(self, require=False): """Determine if the path component is valid. @@ -283,7 +271,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): :returns: ``True`` if the path is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.path, misc.PATH_MATCHER, require) + return validators.path_is_valid(self.path, require) def query_is_valid(self, require=False): """Determine if the query component is valid. @@ -293,7 +281,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): :returns: ``True`` if the query is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.query, misc.QUERY_MATCHER, require) + return validators.query_is_valid(self.query, require) def fragment_is_valid(self, require=False): """Determine if the fragment component is valid. @@ -303,7 +291,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): :returns: ``True`` if the fragment is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.fragment, misc.FRAGMENT_MATCHER, require) + return validators.fragment_is_valid(self.fragment, require) def normalize(self): """Normalize this reference as described in Section 6.2.2. @@ -456,10 +444,3 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): uri = self._replace(**attributes) uri.encoding = self.encoding return uri - - -def valid_ipv4_host_address(host): - """Determine if the given host is a valid IPv4 address.""" - # If the host exists, and it might be IPv4, check each byte in the - # address. - return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py new file mode 100644 index 0000000..e18681a --- /dev/null +++ b/src/rfc3986/validators.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the validation logic for rfc3986.""" +from . import misc + + +def is_valid(value, matcher, require): + """Determine if a value is valid based on the provided matcher. + + :param str value: + Value to validate. + :param matcher: + Compiled regular expression to use to validate the value. + :param require: + Whether or not the value is required. + """ + if require: + return (value is not None + and matcher.match(value)) + + # require is False and value is not None + return value is None or matcher.match(value) + + +def authority_is_valid(authority, host=None, require=False): + """Determine if the authority string is valid. + + :param str authority: + The authority to validate. + :param str host: + (optional) The host portion of the authority to validate. + :param bool require: + (optional) Specify if authority must not be None. + :returns: + ``True`` if valid, ``False`` otherwise + :rtype: + bool + """ + validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require) + if validated and host is not None and misc.IPv4_MATCHER.match(host): + return valid_ipv4_host_address(host) + return validated + + +def scheme_is_valid(scheme, require=False): + """Determine if the scheme is valid. + + :param str scheme: + The scheme string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a scheme. + :returns: + ``True`` if the scheme is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(scheme, misc.SCHEME_MATCHER, require) + + +def path_is_valid(path, require=False): + """Determine if the path component is valid. + + :param str path: + The path string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a path. + :returns: + ``True`` if the path is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(path, misc.PATH_MATCHER, require) + + +def query_is_valid(query, require=False): + """Determine if the query component is valid. + + :param str query: + The query string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a query. + :returns: + ``True`` if the query is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(query, misc.QUERY_MATCHER, require) + + +def fragment_is_valid(fragment, require=False): + """Determine if the fragment component is valid. + + :param str fragment: + The fragment string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a fragment. + :returns: + ``True`` if the fragment is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(fragment, misc.FRAGMENT_MATCHER, require) + + +def valid_ipv4_host_address(host): + """Determine if the given host is a valid IPv4 address.""" + # If the host exists, and it might be IPv4, check each byte in the + # address. + return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) From 259207214f472c841b48e08892a1656d0a6d335d Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 7 Mar 2017 07:16:00 -0600 Subject: [PATCH 04/34] Start implementing validation via Validator --- src/rfc3986/exceptions.py | 51 +++++++++++ src/rfc3986/normalizers.py | 7 +- src/rfc3986/uri.py | 11 +++ src/rfc3986/validators.py | 177 +++++++++++++++++++++++++++++++++++++ tests/test_validators.py | 85 ++++++++++++++++++ 5 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 tests/test_validators.py diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py index f0f84d9..8813405 100644 --- a/src/rfc3986/exceptions.py +++ b/src/rfc3986/exceptions.py @@ -33,3 +33,54 @@ class ResolutionError(RFC3986Exception): """Initialize the error with the failed URI.""" super(ResolutionError, self).__init__( "{0} is not an absolute URI.".format(uri.unsplit())) + + +class ValidationError(RFC3986Exception): + """Exception raised during Validation of a URI.""" + + pass + + +class MissingComponentError(ValidationError): + """Exception raised when a required component is missing.""" + + def __init__(self, uri, *component_names): + """Initialize the error with the missing component name.""" + verb = 'was' + if len(component_names) > 1: + verb = 'were' + + components = ', '.join(sorted(component_names)) + super(MissingComponentError, self).__init__( + "{} {} required but missing".format(components, verb), + uri, + component_names, + ) + + +class UnpermittedComponentError(ValidationError): + """Exception raised when a component has an unpermitted value.""" + + def __init__(self, component_name, component_value, allowed_values): + """Initialize the error with the unpermitted component.""" + super(UnpermittedComponentError, self).__init__( + "{} was required to be one of {!r} but was '{!r}'".format( + component_name, list(sorted(allowed_values)), component_value, + ), + component_name, + component_value, + allowed_values, + ) + + +class PasswordForbidden(ValidationError): + """Exception raised when a URL has a password in the userinfo section.""" + + def __init__(self, uri): + """Initialize the error with the URI that failed validation.""" + unsplit = getattr(uri, 'unsplit', lambda: uri) + super(PasswordForbidden, self).__init__( + '"{}" contained a password when validation forbade it'.format( + unsplit() + ) + ) diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index e497c58..82cbffb 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -31,12 +31,17 @@ def normalize_authority(authority): if userinfo: result += normalize_percent_characters(userinfo) + '@' if host: - result += host.lower() + result += normalize_host(host) if port: result += ':' + port return result +def normalize_host(host): + """Normalize a host string.""" + return host.lower() + + def normalize_path(path): """Normalize the path string.""" if not path: diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 98dc437..232d5cf 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -444,3 +444,14 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): uri = self._replace(**attributes) uri.encoding = self.encoding return uri + + def validate(self, validator): + """Validate the URI using the configured validator. + + :param validator: + Instantiated and configured Validator. + :type validator: + rfc3986.validators.Validator + :raises ValidatorErrors: + In the event of one or more errors in validation. + """ diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index e18681a..6ce5ecc 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -13,7 +13,184 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing the validation logic for rfc3986.""" +from . import exceptions from . import misc +from . import normalizers + + +class Validator(object): + """Object used to configure validation of all objects in rfc3986. + + Example usage: + + .. code-block:: python + + >>> uri = rfc3986.uri_reference('https://github.com/') + >>> validator = rfc3986.Validator().require_components( + ... 'scheme', 'host', 'path', + ... ).allow_schemes( + ... 'http', 'https', + ... ).allow_hosts( + ... '127.0.0.1', 'github.com', + ... ) + ... + >>> validator.validate(uri) + >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') + >>> validator.validate(invalid_uri) + Traceback (most recent call last): + ... + ValidationErrors("Invalid scheme", "Missing path") + + """ + + COMPONENT_NAMES = frozenset([ + 'scheme', + 'userinfo', + 'host', + 'port', + 'path', + 'query', + 'fragment', + ]) + + def __init__(self): + """Initialize our default validations.""" + self.allowed_schemes = set() + self.allowed_hosts = set() + self.allowed_ports = set() + self.allow_password = True + self.require_presence_of = { + 'scheme': False, + 'userinfo': False, + 'host': False, + 'port': False, + 'path': False, + 'query': False, + 'fragment': False, + } + + def allow_schemes(self, *schemes): + """Require the scheme to be one of the provided schemes. + + :param schemes: + Schemes, without ``://`` that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for scheme in schemes: + self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) + return self + + def allow_hosts(self, *hosts): + """Require the host to be one of the provided hosts. + + :param hosts: + Hosts that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for host in hosts: + self.allowed_hosts.add(normalizers.normalize_host(host)) + return self + + def allow_ports(self, *ports): + """Require the port to be one of the provided ports. + + :param ports: + Ports that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for port in ports: + port_int = int(port, base=10) + if 0 <= port_int <= 65535: + self.allowed_ports.add(port_int) + return self + + def allow_use_of_password(self): + """Allow passwords to be present in the URI.""" + self.allow_password = True + return self + + def forbid_use_of_password(self): + """Prevent passwords from being included in the URI.""" + self.allow_password = False + return self + + def require_components(self, *components): + """Require the components provided. + + :param components: + Names of components from :attr:`Validator.COMPONENT_NAMES`. + :returns: + The validator instance. + :rtype: + Validator + """ + components = [c.lower() for c in components] + for component in components: + if component not in self.COMPONENT_NAMES: + raise ValueError( + '"{}" is not a valid component'.format(component) + ) + self.require_presence_of({ + component: True for component in components + }) + return self + + def validate(self, uri): + """Check a URI for conditions specified on this validator. + + :param uri: + Parsed URI to validate. + :type uri: + rfc3986.uri.URIReference + :raises MissingComponentError: + When a required component is missing. + :raises UnpermittedComponentError: + When a component is not one of those allowed. + :raises PasswordForbidden: + When a password is present in the userinfo component but is + not permitted by configuration. + """ + if not self.allow_password: + check_password(uri) + + required_components = [ + component + for component, required in self.require_presence_of.items() + if required + ] + if required_components: + ensure_required_components_exist(uri, required_components) + + +def check_password(uri): + """Assert that there is no password present in the uri.""" + userinfo = uri.userinfo + if not userinfo: + return + credentials = userinfo.split(':', 1) + if len(credentials) <= 1: + return + raise exceptions.PasswordForbidden(uri) + + +def ensure_required_components_exist(uri, required_components): + """Assert that all required components are present in the URI.""" + missing_components = sorted([ + component + for component in required_components + if getattr(uri, component) is None + ]) + if missing_components: + raise exceptions.MissingComponentError(uri) def is_valid(value, matcher, require): diff --git a/tests/test_validators.py b/tests/test_validators.py new file mode 100644 index 0000000..d482c6c --- /dev/null +++ b/tests/test_validators.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +"""Tests for the validators module.""" +import rfc3986 +from rfc3986 import exceptions +from rfc3986 import validators + +import pytest + + +def test_defaults(): + """Verify the default Validator settings.""" + validator = validators.Validator() + + assert validator.require_presence_of == { + c: False for c in validator.COMPONENT_NAMES + } + assert validator.allow_password is True + assert validator.allowed_schemes == set() + assert validator.allowed_hosts == set() + assert validator.allowed_ports == set() + + +def test_allowing_schemes(): + """Verify the ability to select schemes to be allowed.""" + validator = validators.Validator().allow_schemes('http', 'https') + + assert 'http' in validator.allowed_schemes + assert 'https' in validator.allowed_schemes + + +def test_allowing_hosts(): + """Verify the ability to select hosts to be allowed.""" + validator = validators.Validator().allow_hosts( + 'pypi.python.org', 'pypi.org', + ) + + assert 'pypi.python.org' in validator.allowed_hosts + assert 'pypi.org' in validator.allowed_hosts + + +def test_allowing_ports(): + """Verify the ability select ports to be allowed.""" + validator = validators.Validator().allow_ports('80', '100') + + assert 80 in validator.allowed_ports + assert 100 in validator.allowed_ports + + +def test_use_of_password(): + """Verify the behaviour of {forbid,allow}_use_of_password.""" + validator = validators.Validator() + assert validator.allow_password is True + + validator.forbid_use_of_password() + assert validator.allow_password is False + + validator.allow_use_of_password() + assert validator.allow_password is True + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://user:password@github.com'), + rfc3986.uri_reference('https://user:password@github.com/path'), + rfc3986.uri_reference('https://user:password@github.com/path?query'), + rfc3986.uri_reference('https://user:password@github.com/path?query#frag'), + rfc3986.uri_reference('//user:password@github.com'), +]) +def test_forbidden_passwords(uri): + """Verify that passwords are disallowed.""" + validator = validators.Validator().forbid_use_of_password() + with pytest.raises(exceptions.PasswordForbidden): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://user@github.com'), + rfc3986.uri_reference('https://user@github.com/path'), + rfc3986.uri_reference('https://user@github.com/path?query'), + rfc3986.uri_reference('https://user@github.com/path?query#frag'), + rfc3986.uri_reference('//user@github.com'), +]) +def test_passwordless_uris_pass_validation(uri): + """Verify password-less URLs validate properly.""" + validator = validators.Validator().forbid_use_of_password() + validator.validate(uri) From 46ecb8468303a124e458d8a4b25d6d6bccdfb3f5 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Wed, 8 Mar 2017 19:11:18 -0600 Subject: [PATCH 05/34] Add more tests around existing validation --- src/rfc3986/validators.py | 4 ++-- tests/test_validators.py | 49 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 6ce5ecc..4377595 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -139,7 +139,7 @@ class Validator(object): raise ValueError( '"{}" is not a valid component'.format(component) ) - self.require_presence_of({ + self.require_presence_of.update({ component: True for component in components }) return self @@ -190,7 +190,7 @@ def ensure_required_components_exist(uri, required_components): if getattr(uri, component) is None ]) if missing_components: - raise exceptions.MissingComponentError(uri) + raise exceptions.MissingComponentError(uri, *missing_components) def is_valid(value, matcher, require): diff --git a/tests/test_validators.py b/tests/test_validators.py index d482c6c..45bfad5 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -46,6 +46,12 @@ def test_allowing_ports(): assert 100 in validator.allowed_ports +def test_requiring_invalid_component(): + """Verify that we validate required component names.""" + with pytest.raises(ValueError): + validators.Validator().require_components('frob') + + def test_use_of_password(): """Verify the behaviour of {forbid,allow}_use_of_password.""" validator = validators.Validator() @@ -78,8 +84,51 @@ def test_forbidden_passwords(uri): rfc3986.uri_reference('https://user@github.com/path?query'), rfc3986.uri_reference('https://user@github.com/path?query#frag'), rfc3986.uri_reference('//user@github.com'), + rfc3986.uri_reference('//github.com'), + rfc3986.uri_reference('https://github.com'), ]) def test_passwordless_uris_pass_validation(uri): """Verify password-less URLs validate properly.""" validator = validators.Validator().forbid_use_of_password() validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://'), + rfc3986.uri_reference('/path/to/resource'), +]) +def test_missing_host_component(uri): + """Verify that missing host components cause errors.""" + validator = validators.Validator().require_components('host') + with pytest.raises(exceptions.MissingComponentError): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://'), + rfc3986.uri_reference('//google.com'), + rfc3986.uri_reference('//google.com?query=value'), + rfc3986.uri_reference('//google.com#fragment'), + rfc3986.uri_reference('https://google.com'), + rfc3986.uri_reference('https://google.com#fragment'), + rfc3986.uri_reference('https://google.com?query=value'), +]) +def test_missing_path_component(uri): + """Verify that missing path components cause errors.""" + validator = validators.Validator().require_components('path') + with pytest.raises(exceptions.MissingComponentError): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('//google.com'), + rfc3986.uri_reference('//google.com?query=value'), + rfc3986.uri_reference('//google.com#fragment'), +]) +def test_multiple_missing_components(uri): + """Verify that multiple missing components are caught.""" + validator = validators.Validator().require_components('scheme', 'path') + with pytest.raises(exceptions.MissingComponentError) as captured_exc: + validator.validate(uri) + exception = captured_exc.value + assert 2 == len(exception.args[-1]) From 74966158aa6df8be713fc9098e9ba0679d155afd Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Thu, 9 Mar 2017 06:56:27 -0600 Subject: [PATCH 06/34] Add more tests around validation --- src/rfc3986/validators.py | 15 ++++++++++++++- tests/test_validators.py | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 4377595..48841a8 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -110,7 +110,7 @@ class Validator(object): for port in ports: port_int = int(port, base=10) if 0 <= port_int <= 65535: - self.allowed_ports.add(port_int) + self.allowed_ports.add(port) return self def allow_use_of_password(self): @@ -170,6 +170,10 @@ class Validator(object): if required_components: ensure_required_components_exist(uri, required_components) + ensure_one_of(self.allowed_schemes, uri, 'scheme') + ensure_one_of(self.allowed_hosts, uri, 'host') + ensure_one_of(self.allowed_ports, uri, 'port') + def check_password(uri): """Assert that there is no password present in the uri.""" @@ -182,6 +186,15 @@ def check_password(uri): raise exceptions.PasswordForbidden(uri) +def ensure_one_of(allowed_values, uri, attribute): + """Assert that the uri's attribute is one of the allowed values.""" + value = getattr(uri, attribute) + if allowed_values and value not in allowed_values: + raise exceptions.UnpermittedComponentError( + attribute, value, allowed_values, + ) + + def ensure_required_components_exist(uri, required_components): """Assert that all required components are present in the URI.""" missing_components = sorted([ diff --git a/tests/test_validators.py b/tests/test_validators.py index 45bfad5..d7c8b17 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -42,8 +42,8 @@ def test_allowing_ports(): """Verify the ability select ports to be allowed.""" validator = validators.Validator().allow_ports('80', '100') - assert 80 in validator.allowed_ports - assert 100 in validator.allowed_ports + assert '80' in validator.allowed_ports + assert '100' in validator.allowed_ports def test_requiring_invalid_component(): @@ -132,3 +132,14 @@ def test_multiple_missing_components(uri): validator.validate(uri) exception = captured_exc.value assert 2 == len(exception.args[-1]) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('smtp://'), + rfc3986.uri_reference('telnet://'), +]) +def test_ensure_uri_has_a_scheme(uri): + """Verify validation with allowed schemes.""" + validator = validators.Validator().allow_schemes('https', 'http') + with pytest.raises(exceptions.UnpermittedComponentError): + validator.validate(uri) From e6695d1052d2549a8aae410586681eea25f52c8e Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Thu, 9 Mar 2017 07:43:21 -0600 Subject: [PATCH 07/34] Fix up .travis.yaml for new support matrix --- .travis.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7f51e25..510237d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,6 @@ script: tox matrix: include: - - python: 2.6 - env: TOXENV=py26 - python: 2.7 env: TOXENV=py27 - python: 3.3 @@ -19,10 +17,8 @@ matrix: env: TOXENV=py35 - python: pypy env: TOXENV=pypy - - python: 2.7 - env: TOXENV=py27-flake8 - - python: 3.4 - env: TOXENV=py34-flake8 + - python: 3.5 + env: TOXENV=flake8 #- env: TOXENV=docs notifications: From 0035c8e53cd9b9609adadcd2ffc998728817d6a1 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 11 Mar 2017 08:26:08 -0600 Subject: [PATCH 08/34] Rename require_components to require_presence_of This (in my opinion) is a far more explicit API method as reading it allowed sounds like English, e.g., Validator().require_presence_of('host', 'port', 'path') Or "Validator require(s) presence of host, port, and path". This also adds a bunch more tests around validation and tacks some of the validation error information onto the exception object for easier introspection by users. --- src/rfc3986/exceptions.py | 12 ++++++-- src/rfc3986/validators.py | 10 +++---- tests/test_validators.py | 61 +++++++++++++++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py index 8813405..49e8d08 100644 --- a/src/rfc3986/exceptions.py +++ b/src/rfc3986/exceptions.py @@ -50,11 +50,13 @@ class MissingComponentError(ValidationError): if len(component_names) > 1: verb = 'were' - components = ', '.join(sorted(component_names)) + self.uri = uri + self.components = sorted(component_names) + components = ', '.join(self.components) super(MissingComponentError, self).__init__( "{} {} required but missing".format(components, verb), uri, - component_names, + self.components, ) @@ -64,13 +66,16 @@ class UnpermittedComponentError(ValidationError): def __init__(self, component_name, component_value, allowed_values): """Initialize the error with the unpermitted component.""" super(UnpermittedComponentError, self).__init__( - "{} was required to be one of {!r} but was '{!r}'".format( + "{} was required to be one of {!r} but was {!r}".format( component_name, list(sorted(allowed_values)), component_value, ), component_name, component_value, allowed_values, ) + self.component_name = component_name + self.component_value = component_value + self.allowed_values = allowed_values class PasswordForbidden(ValidationError): @@ -84,3 +89,4 @@ class PasswordForbidden(ValidationError): unsplit() ) ) + self.uri = uri diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 48841a8..5103fb5 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -59,7 +59,7 @@ class Validator(object): self.allowed_hosts = set() self.allowed_ports = set() self.allow_password = True - self.require_presence_of = { + self.required_components = { 'scheme': False, 'userinfo': False, 'host': False, @@ -123,7 +123,7 @@ class Validator(object): self.allow_password = False return self - def require_components(self, *components): + def require_presence_of(self, *components): """Require the components provided. :param components: @@ -139,7 +139,7 @@ class Validator(object): raise ValueError( '"{}" is not a valid component'.format(component) ) - self.require_presence_of.update({ + self.required_components.update({ component: True for component in components }) return self @@ -164,7 +164,7 @@ class Validator(object): required_components = [ component - for component, required in self.require_presence_of.items() + for component, required in self.required_components.items() if required ] if required_components: @@ -189,7 +189,7 @@ def check_password(uri): def ensure_one_of(allowed_values, uri, attribute): """Assert that the uri's attribute is one of the allowed values.""" value = getattr(uri, attribute) - if allowed_values and value not in allowed_values: + if value is not None and allowed_values and value not in allowed_values: raise exceptions.UnpermittedComponentError( attribute, value, allowed_values, ) diff --git a/tests/test_validators.py b/tests/test_validators.py index d7c8b17..8aef1a8 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -11,7 +11,7 @@ def test_defaults(): """Verify the default Validator settings.""" validator = validators.Validator() - assert validator.require_presence_of == { + assert validator.required_components == { c: False for c in validator.COMPONENT_NAMES } assert validator.allow_password is True @@ -49,7 +49,7 @@ def test_allowing_ports(): def test_requiring_invalid_component(): """Verify that we validate required component names.""" with pytest.raises(ValueError): - validators.Validator().require_components('frob') + validators.Validator().require_presence_of('frob') def test_use_of_password(): @@ -99,7 +99,7 @@ def test_passwordless_uris_pass_validation(uri): ]) def test_missing_host_component(uri): """Verify that missing host components cause errors.""" - validator = validators.Validator().require_components('host') + validator = validators.Validator().require_presence_of('host') with pytest.raises(exceptions.MissingComponentError): validator.validate(uri) @@ -115,7 +115,7 @@ def test_missing_host_component(uri): ]) def test_missing_path_component(uri): """Verify that missing path components cause errors.""" - validator = validators.Validator().require_components('path') + validator = validators.Validator().require_presence_of('path') with pytest.raises(exceptions.MissingComponentError): validator.validate(uri) @@ -127,7 +127,7 @@ def test_missing_path_component(uri): ]) def test_multiple_missing_components(uri): """Verify that multiple missing components are caught.""" - validator = validators.Validator().require_components('scheme', 'path') + validator = validators.Validator().require_presence_of('scheme', 'path') with pytest.raises(exceptions.MissingComponentError) as captured_exc: validator.validate(uri) exception = captured_exc.value @@ -143,3 +143,54 @@ def test_ensure_uri_has_a_scheme(uri): validator = validators.Validator().allow_schemes('https', 'http') with pytest.raises(exceptions.UnpermittedComponentError): validator.validate(uri) + + +@pytest.mark.parametrize('uri, failed_component', [ + (rfc3986.uri_reference('git://github.com'), 'scheme'), + (rfc3986.uri_reference('http://github.com'), 'scheme'), + (rfc3986.uri_reference('ssh://gitlab.com'), 'host'), + (rfc3986.uri_reference('https://gitlab.com'), 'host'), +]) +def test_allowed_hosts_and_schemes(uri, failed_component): + """Verify each of these fails.""" + validator = validators.Validator().allow_schemes( + 'https', 'ssh', + ).allow_hosts( + 'github.com', 'git.openstack.org', + ) + with pytest.raises(exceptions.UnpermittedComponentError) as caught_exc: + validator.validate(uri) + + exc = caught_exc.value + assert exc.component_name == failed_component + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://github.com/sigmavirus24'), + rfc3986.uri_reference('ssh://github.com/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@github.com:22/sigmavirus24'), + rfc3986.uri_reference('https://github.com:443/sigmavirus24'), + rfc3986.uri_reference('https://gitlab.com/sigmavirus24'), + rfc3986.uri_reference('ssh://gitlab.com/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@gitlab.com:22/sigmavirus24'), + rfc3986.uri_reference('https://gitlab.com:443/sigmavirus24'), + rfc3986.uri_reference('https://bitbucket.org/sigmavirus24'), + rfc3986.uri_reference('ssh://bitbucket.org/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@bitbucket.org:22/sigmavirus24'), + rfc3986.uri_reference('https://bitbucket.org:443/sigmavirus24'), + rfc3986.uri_reference('https://git.openstack.org/sigmavirus24'), + rfc3986.uri_reference('ssh://git.openstack.org/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@git.openstack.org:22/sigmavirus24'), + rfc3986.uri_reference('https://git.openstack.org:443/sigmavirus24'), +]) +def test_successful_complex_validation(uri): + """Verify we do not raise ValidationErrors for good URIs.""" + validators.Validator().allow_schemes( + 'https', 'ssh', + ).allow_hosts( + 'github.com', 'bitbucket.org', 'gitlab.com', 'git.openstack.org', + ).allow_ports( + '22', '443', + ).require_presence_of( + 'scheme', 'host', 'path', + ).validate(uri) From 5aab5ae8b4acf074b75b8f40824041589fb64c5b Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 11 Mar 2017 20:00:16 -0600 Subject: [PATCH 09/34] Start adding a URIBuilder object --- src/rfc3986/builder.py | 116 +++++++++++++++++++++++++++++++++++++ src/rfc3986/compat.py | 16 ++++- src/rfc3986/normalizers.py | 10 ++++ tests/test_builder.py | 67 +++++++++++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 src/rfc3986/builder.py create mode 100644 tests/test_builder.py diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py new file mode 100644 index 0000000..85469ed --- /dev/null +++ b/src/rfc3986/builder.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the logic for the URIBuilder object.""" +from . import normalizers + + +class URIBuilder(object): + """Object to aid in building up a URI Reference from parts. + + .. note:: + + This object should be instantiated by the user, but it's recommended + that it is not provided with arguments. Instead, use the available + method to populate the fields. + + """ + + def __init__(self, scheme=None, userinfo=None, host=None, port=None, + path=None, query=None, fragment=None): + """Initialize our URI builder. + + :param str scheme: + (optional) + :param str userinfo: + (optional) + :param str host: + (optional) + :param int port: + (optional) + :param str path: + (optional) + :param str query: + (optional) + :param str fragment: + (optional) + """ + self.scheme = scheme + self.userinfo = userinfo + self.host = host + self.port = port + self.path = path + self.query = query + self.fragment = fragment + + def __repr__(self): + """Provide a convenient view of our builder object.""" + formatstr = ('URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, ' + 'host={b.host}, port={b.port}, path={b.path}, ' + 'query={b.query}, fragment={b.fragment})') + return formatstr.format(b=self) + + def add_scheme(self, scheme): + """Add a scheme to our builder object. + + After normalizing, this will generate a new URIBuilder instance with + the specified scheme and all other attributes the same. + + .. code-block:: python + + >>> URIBuilder().add_scheme('HTTPS') + URIBuilder(scheme='https', userinfo=None, host=None, port=None, + path=None, query=None, fragment=None) + + """ + scheme = normalizers.normalize_scheme(scheme) + return URIBuilder( + scheme=scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_credentials(self, username, password): + """Add credentials as the userinfo portion of the URI. + + .. code-block:: python + + >>> URIBuilder().add_credentials('root', 's3crete') + URIBuilder(scheme=None, userinfo='root:s3crete', host=None, + port=None, path=None, query=None, fragment=None) + + >>> URIBuilder().add_credentials('root', None) + URIBuilder(scheme=None, userinfo='root', host=None, + port=None, path=None, query=None, fragment=None) + """ + if username is None: + raise ValueError('Username cannot be None') + userinfo = normalizers.normalize_username(username) + + if password is not None: + userinfo += ':{}'.format(normalizers.normalize_password(password)) + + return URIBuilder( + scheme=self.scheme, + userinfo=userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 9888e23..97be9f8 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -15,8 +15,22 @@ """Compatibility module for Python 2 and 3 support.""" import sys +try: + from urllib.parse import quote as urlquote +except ImportError: # Python 2.x + from urllib import quote as urlquote -if sys.version_info >= (3, 0): +__all__ = ( + 'to_bytes', + 'to_str', + 'urlquote', +) + +PY3 = (3, 0) <= sys.version_info < (4, 0) +PY2 = (2, 6) <= sys.version_info < (2, 8) + + +if PY3: unicode = str # Python 3.x diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 82cbffb..8e85886 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -37,6 +37,16 @@ def normalize_authority(authority): return result +def normalize_username(username): + """Normalize a username to make it safe to include in userinfo.""" + return compat.urlquote(username) + + +def normalize_password(password): + """Normalize a password to make safe for userinfo.""" + return compat.urlquote(password) + + def normalize_host(host): """Normalize a host string.""" return host.lower() diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..a2e8043 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the tests for the URIBuilder object.""" +import pytest + +from rfc3986 import builder + + +def test_builder_default(): + """Verify the default values.""" + uribuilder = builder.URIBuilder() + assert uribuilder.scheme is None + assert uribuilder.userinfo is None + assert uribuilder.host is None + assert uribuilder.port is None + assert uribuilder.path is None + assert uribuilder.query is None + assert uribuilder.fragment is None + + +def test_repr(): + """Verify our repr looks like our class.""" + uribuilder = builder.URIBuilder() + assert repr(uribuilder).startswith('URIBuilder(scheme=None') + + +@pytest.mark.parametrize('scheme', [ + 'https', + 'hTTps', + 'Https', + 'HtTpS', + 'HTTPS', +]) +def test_add_scheme(scheme): + """Verify schemes are normalized when added.""" + uribuilder = builder.URIBuilder().add_scheme(scheme) + assert uribuilder.scheme == 'https' + + +@pytest.mark.parametrize('username, password, userinfo', [ + ('user', 'pass', 'user:pass'), + ('user', None, 'user'), + ('user@domain.com', 'password', 'user%40domain.com:password'), + ('user', 'pass:word', 'user:pass%3Aword'), +]) +def test_add_credentials(username, password, userinfo): + """Verify we normalize usernames and passwords.""" + uribuilder = builder.URIBuilder().add_credentials(username, password) + assert uribuilder.userinfo == userinfo + + +def test_add_credentials_requires_username(): + """Verify one needs a username to add credentials.""" + with pytest.raises(ValueError): + builder.URIBuilder().add_credentials(None, None) From 3936f8a7fd856334ea14724f1c10d1a8fb684355 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 12 Mar 2017 19:49:55 -0500 Subject: [PATCH 10/34] Add add_host implementation and testing to URIBuilder --- src/rfc3986/builder.py | 20 ++++++++++++++++++++ tests/test_builder.py | 12 ++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 85469ed..4cb6129 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -114,3 +114,23 @@ class URIBuilder(object): query=self.query, fragment=self.fragment, ) + + def add_host(self, host): + """Add hostname to the URI. + + .. code-block:: python + + >>> URIBuilder().add_host('google.com') + URIBuilder(scheme=None, userinfo=None, host='google.com', + port=None, path=None, query=None, fragment=None) + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=normalizers.normalize_host(host), + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) diff --git a/tests/test_builder.py b/tests/test_builder.py index a2e8043..b10ffc2 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -65,3 +65,15 @@ def test_add_credentials_requires_username(): """Verify one needs a username to add credentials.""" with pytest.raises(ValueError): builder.URIBuilder().add_credentials(None, None) + + +@pytest.mark.parametrize('hostname', [ + 'google.com', + 'GOOGLE.COM', + 'gOOgLe.COM', + 'goOgLE.com', +]) +def test_add_host(hostname): + """Verify we normalize hostnames in add_host.""" + uribuilder = builder.URIBuilder().add_host(hostname) + assert uribuilder.host == 'google.com' From 3f8cb0c4cc532e0d6cf01b16ac4784ceb0206a74 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 12 Mar 2017 20:20:08 -0500 Subject: [PATCH 11/34] Add path and query handling to URIBuilder --- src/rfc3986/builder.py | 113 +++++++++++++++++++++++++++++++++++++++++ src/rfc3986/compat.py | 5 ++ tests/test_builder.py | 69 +++++++++++++++++++++++++ 3 files changed, 187 insertions(+) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 4cb6129..5cd654e 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing the logic for the URIBuilder object.""" +from . import compat from . import normalizers @@ -134,3 +135,115 @@ class URIBuilder(object): query=self.query, fragment=self.fragment, ) + + def add_port(self, port): + """Add port to the URI. + + .. code-block:: python + + >>> URIBuilder().add_port(80) + URIBuilder(scheme=None, userinfo=None, host=None, port='80', + path=None, query=None, fragment=None) + + >>> URIBuilder().add_port(443) + URIBuilder(scheme=None, userinfo=None, host=None, port='443', + path=None, query=None, fragment=None) + + """ + port_int = int(port) + if port_int < 0: + raise ValueError( + 'ports are not allowed to be negative. You provided {}'.format( + port_int, + ) + ) + if port_int > 65535: + raise ValueError( + 'ports are not allowed to be larger than 65535. ' + 'You provided {}'.format( + port_int, + ) + ) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port='{}'.format(port_int), + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_path(self, path): + """Add a path to the URI. + + .. code-block:: python + + >>> URIBuilder().add_path('sigmavirus24/rfc3985') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path='/sigmavirus24/rfc3986', query=None, fragment=None) + + >>> URIBuilder().add_path('/checkout.php') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path='/checkout.php', query=None, fragment=None) + + """ + if not path.startswith('/'): + path = '/{}'.format(path) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=path, + query=self.query, + fragment=self.fragment, + ) + + def add_query_from(self, query_items): + """Generate and add a query a dictionary or list of tuples. + + .. code-block:: python + + >>> URIBuilder().add_query_from({'a': 'b c'}) + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b+c', fragment=None) + + >>> URIBuilder().add_query_from([('a', 'b c')]) + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b+c', fragment=None) + + """ + query = compat.urlencode(query_items) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=query, + fragment=self.fragment, + ) + + def add_query(self, query): + """Add a pre-formated query string to the URI. + + .. code-block:: python + + >>> URIBuilder().add_query('a=b&c=d') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b&c=d', fragment=None) + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=query, + fragment=self.fragment, + ) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 97be9f8..c053f71 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -20,6 +20,11 @@ try: except ImportError: # Python 2.x from urllib import quote as urlquote +try: + from urllib.parse import urlencode +except ImportError: # Python 2.x + from urllib import urlencode + __all__ = ( 'to_bytes', 'to_str', diff --git a/tests/test_builder.py b/tests/test_builder.py index b10ffc2..903634e 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -77,3 +77,72 @@ def test_add_host(hostname): """Verify we normalize hostnames in add_host.""" uribuilder = builder.URIBuilder().add_host(hostname) assert uribuilder.host == 'google.com' + + +@pytest.mark.parametrize('port', [ + -100, + '-100', + -1, + '-1', + 65536, + '65536', + 1000000, + '1000000', + '', + 'abc', + '0b10', +]) +def test_add_invalid_port(port): + """Verify we raise a ValueError for invalid ports.""" + with pytest.raises(ValueError): + builder.URIBuilder().add_port(port) + + +@pytest.mark.parametrize('port, expected', [ + (0, '0'), + ('0', '0'), + (1, '1'), + ('1', '1'), + (22, '22'), + ('22', '22'), + (80, '80'), + ('80', '80'), + (443, '443'), + ('443', '443'), + (65535, '65535'), + ('65535', '65535'), +]) +def test_add_port(port, expected): + """Verify we normalize our port.""" + uribuilder = builder.URIBuilder().add_port(port) + assert uribuilder.port == expected + + +@pytest.mark.parametrize('path', [ + 'sigmavirus24/rfc3986', + '/sigmavirus24/rfc3986', +]) +def test_add_path(path): + """Verify we normalize our path value.""" + uribuilder = builder.URIBuilder().add_path(path) + assert uribuilder.path == '/sigmavirus24/rfc3986' + + +@pytest.mark.parametrize('query_items, expected', [ + ({'a': 'b c'}, 'a=b+c'), + ({'a': 'b+c'}, 'a=b%2Bc'), + ([('a', 'b c')], 'a=b+c'), + ([('a', 'b+c')], 'a=b%2Bc'), + ([('a', 'b'), ('c', 'd')], 'a=b&c=d'), + ([('a', 'b'), ('username', '@d')], 'a=b&username=%40d'), +]) +def test_add_query_from(query_items, expected): + """Verify the behaviour of add_query_from.""" + uribuilder = builder.URIBuilder().add_query_from(query_items) + assert uribuilder.query == expected + + +def test_add_query(): + """Verify we do not modify the provided query string.""" + uribuilder = builder.URIBuilder().add_query('username=@foo') + assert uribuilder.query == 'username=@foo' From 673617a0c5c436ad84484137a8b3ac7945f1466a Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 14 Mar 2017 06:29:20 -0500 Subject: [PATCH 12/34] Add fragment handling and tests --- src/rfc3986/builder.py | 26 +++++++++++++++++++++++--- src/rfc3986/compat.py | 1 + tests/test_builder.py | 6 ++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 5cd654e..b459e2b 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -197,7 +197,7 @@ class URIBuilder(object): userinfo=self.userinfo, host=self.host, port=self.port, - path=path, + path=normalizers.normalize_path(path), query=self.query, fragment=self.fragment, ) @@ -216,7 +216,7 @@ class URIBuilder(object): path=None, query='a=b+c', fragment=None) """ - query = compat.urlencode(query_items) + query = normalizers.normalize_query(compat.urlencode(query_items)) return URIBuilder( scheme=self.scheme, @@ -244,6 +244,26 @@ class URIBuilder(object): host=self.host, port=self.port, path=self.path, - query=query, + query=normalizers.normalize_query(query), fragment=self.fragment, ) + + def add_fragment(self, fragment): + """Add a fragment to the URI. + + .. code-block:: python + + >>> URIBuilder().add_fragment('section-2.6.1') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query=None, fragment='section-2.6.1') + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=normalizers.normalize_fragment(fragment), + ) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index c053f71..8da7770 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -29,6 +29,7 @@ __all__ = ( 'to_bytes', 'to_str', 'urlquote', + 'urlencode', ) PY3 = (3, 0) <= sys.version_info < (4, 0) diff --git a/tests/test_builder.py b/tests/test_builder.py index 903634e..e49386d 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -146,3 +146,9 @@ def test_add_query(): """Verify we do not modify the provided query string.""" uribuilder = builder.URIBuilder().add_query('username=@foo') assert uribuilder.query == 'username=@foo' + + +def test_add_fragment(): + """Verify our handling of fragments.""" + uribuilder = builder.URIBuilder().add_fragment('section-2.5.1') + assert uribuilder.fragment == 'section-2.5.1' From cdd60e3c871770f9246aaeaeba5b6a5c57375a4a Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 14 Mar 2017 06:48:36 -0500 Subject: [PATCH 13/34] Add the ability to finalize a URI This will return a URIReference to the user and allow them to unsplit the object. --- src/rfc3986/builder.py | 26 ++++++++++++++++++++++++++ tests/test_builder.py | 11 +++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index b459e2b..8a6de13 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -15,6 +15,7 @@ """Module containing the logic for the URIBuilder object.""" from . import compat from . import normalizers +from . import uri class URIBuilder(object): @@ -267,3 +268,28 @@ class URIBuilder(object): query=self.query, fragment=normalizers.normalize_fragment(fragment), ) + + def finalize(self): + """Create a URIReference from our builder. + + .. code-block:: python + + >>> URIBuilder().add_scheme('https').add_host('github.com' + ... ).add_path('sigmavirus24/rfc3986').finalize().unsplit() + 'https://github.com/sigmavirus24/rfc3986' + + >>> URIBuilder().add_scheme('https').add_host('github.com' + ... ).add_path('sigmavirus24/rfc3986').add_credentials( + ... 'sigmavirus24', 'not-re@l').finalize().unsplit() + 'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986' + + """ + return uri.URIReference( + self.scheme, + normalizers.normalize_authority( + (self.userinfo, self.host, self.port) + ), + self.path, + self.query, + self.fragment, + ) diff --git a/tests/test_builder.py b/tests/test_builder.py index e49386d..251f353 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -152,3 +152,14 @@ def test_add_fragment(): """Verify our handling of fragments.""" uribuilder = builder.URIBuilder().add_fragment('section-2.5.1') assert uribuilder.fragment == 'section-2.5.1' + + +def test_finalize(): + """Verify the whole thing.""" + uri = builder.URIBuilder().add_scheme('https').add_credentials( + 'sigmavirus24', 'not-my-re@l-password' + ).add_host('github.com').add_path('sigmavirus24/rfc3986').finalize( + ).unsplit() + expected = ('https://sigmavirus24:not-my-re%40l-password@github.com/' + 'sigmavirus24/rfc3986') + assert expected == uri From 10529e2699d6039be4b9b889d05c452f72d6f159 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 14 Mar 2017 07:07:19 -0500 Subject: [PATCH 14/34] Begin documenting the module --- docs/source/conf.py | 162 ++++++++++++++++++++++++++++++++++++++++++ docs/source/index.rst | 20 ++++++ tox.ini | 2 +- 3 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..cd716e5 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# +# rfc3986 documentation build configuration file, created by +# sphinx-quickstart on Tue Mar 14 07:06:46 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'rfc3986' +copyright = u'2017, Ian Cordasco' +author = u'Ian Cordasco' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = u'1.0.0' +# The full version, including alpha/beta/rc tags. +release = u'1.0.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'rfc3986doc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'rfc3986.tex', u'rfc3986 Documentation', + u'Ian Cordasco', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'rfc3986', u'rfc3986 Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'rfc3986', u'rfc3986 Documentation', + author, 'rfc3986', 'One line description of project.', + 'Miscellaneous'), +] + + + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..6ef36e6 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,20 @@ +.. rfc3986 documentation master file, created by + sphinx-quickstart on Tue Mar 14 07:06:46 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to rfc3986's documentation! +=================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/tox.ini b/tox.ini index e579c75..f126768 100644 --- a/tox.ini +++ b/tox.ini @@ -38,7 +38,7 @@ commands = deps = sphinx>=1.3.0 commands = - sphinx-build -E -c docs -b html docs/ docs/_build/html + sphinx-build -E -c docs -b html docs/source/ docs/build/html [testenv:readme] deps = From 90e74f6a032224f71b6e715d6d8512173b486f0e Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Mon, 20 Mar 2017 06:54:45 -0500 Subject: [PATCH 15/34] Start seriously working on documentation --- README.rst | 4 ++-- docs/source/api-ref/uri.rst | 19 ++++++++++++++++++ docs/source/conf.py | 12 ++++++++---- docs/source/index.rst | 37 +++++++++++++++++++++--------------- docs/source/narrative.rst | 27 ++++++++++++++++++++++++++ docs/source/user/parsing.rst | 9 +++++++++ src/rfc3986/uri.py | 11 ----------- tox.ini | 3 ++- 8 files changed, 89 insertions(+), 33 deletions(-) create mode 100644 docs/source/api-ref/uri.rst create mode 100644 docs/source/narrative.rst create mode 100644 docs/source/user/parsing.rst diff --git a/README.rst b/README.rst index 62e4974..0d3dcb6 100644 --- a/README.rst +++ b/README.rst @@ -2,12 +2,12 @@ rfc3986 ======= A Python implementation of `RFC 3986`_ including validation and authority -parsing. Coming soon: `Reference Resolution `_. +parsing. Installation ------------ -Simply use pip to install ``rfc3986`` like so:: +Use pip to install ``rfc3986`` like so:: pip install rfc3986 diff --git a/docs/source/api-ref/uri.rst b/docs/source/api-ref/uri.rst new file mode 100644 index 0000000..bedc9f4 --- /dev/null +++ b/docs/source/api-ref/uri.rst @@ -0,0 +1,19 @@ +=============== + URI Submodule +=============== + +.. autoclass:: rfc3986.uri.URIReference + +.. automethod:: rfc3986.uri.URIReference.from_string + +.. automethod:: rfc3986.uri.URIReference.unsplit + +.. automethod:: rfc3986.uri.URIReference.resolve_with + +.. automethod:: rfc3986.uri.URIReference.copy_with + +.. automethod:: rfc3986.uri.URIReference.normalize + +.. automethod:: rfc3986.uri.URIReference.is_absolute + +.. automethod:: rfc3986.uri.URIReference.authority_info diff --git a/docs/source/conf.py b/docs/source/conf.py index cd716e5..8900180 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,14 +26,20 @@ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' +rst_epilog = """ +.. |rfc3986| replace:: :mod:`rfc3986` +""" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.autodoc', +extensions = [ + 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', - 'sphinx.ext.coverage'] + 'sphinx.ext.coverage', + 'sphinx-prompt', +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -156,7 +162,5 @@ texinfo_documents = [ ] - - # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/docs/source/index.rst b/docs/source/index.rst index 6ef36e6..f69505b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,20 +1,27 @@ -.. rfc3986 documentation master file, created by - sphinx-quickstart on Tue Mar 14 07:06:46 2017. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +========= + rfc3986 +========= -Welcome to rfc3986's documentation! -=================================== +|rfc3986| is a Python implementation of :rfc:`3986` including validation and +authority parsing. + +The maintainers strongly suggest using `pip`_ to install |rfc3986|. For +example, + +.. prompt:: bash + + pip install rfc3986 + python -m pip install rfc3986 + python3.6 -m pip install rfc3986 .. toctree:: - :maxdepth: 2 - :caption: Contents: + :maxdepth: 2 + :caption: Contents: + + narrative + api-ref/uri - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. links +.. _pip: + https://pypi.python.org/pypi/pip/ diff --git a/docs/source/narrative.rst b/docs/source/narrative.rst new file mode 100644 index 0000000..94b41ef --- /dev/null +++ b/docs/source/narrative.rst @@ -0,0 +1,27 @@ +==================== + User Documentation +==================== + +|rfc3986| has several API features and convenience methods. The core of +|rfc3986|'s API revolves around parsing, validating, and building URIs. + +There is an API to provide compatibility with :mod:`urllib.parse`, there is an +API to parse a URI as a URI Reference, there's an API to provide validation of +URIs, and finally there's an API to build URIs. + +.. note:: + + There's presently no support for IRIs as defined in :rfc:`3987`. + +|rfc3986| parses URIs much differently from :mod:`urllib.parse` so users may +see some subtle differences with very specific URLs that contain rough +edgecases. Regardless, we do our best to implement the same API so you should +be able to seemlessly swap |rfc3986| for ``urlparse``. + + +.. toctree:: + :maxdepth: 2 + + user/parsing + user/validating + user/building diff --git a/docs/source/user/parsing.rst b/docs/source/user/parsing.rst new file mode 100644 index 0000000..c5785d4 --- /dev/null +++ b/docs/source/user/parsing.rst @@ -0,0 +1,9 @@ +=============== + Parsing a URI +=============== + +There are two ways to parse a URI with |rfc3986| + +#. :meth:`rfc3986.uri.URIReference.from_string` + +#. diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 232d5cf..98dc437 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -444,14 +444,3 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): uri = self._replace(**attributes) uri.encoding = self.encoding return uri - - def validate(self, validator): - """Validate the URI using the configured validator. - - :param validator: - Instantiated and configured Validator. - :type validator: - rfc3986.validators.Validator - :raises ValidatorErrors: - In the event of one or more errors in validation. - """ diff --git a/tox.ini b/tox.ini index f126768..06fa02a 100644 --- a/tox.ini +++ b/tox.ini @@ -37,8 +37,9 @@ commands = [testenv:docs] deps = sphinx>=1.3.0 + sphinx-prompt commands = - sphinx-build -E -c docs -b html docs/source/ docs/build/html + sphinx-build -E -c docs/source/ -b html docs/source/ docs/build/html [testenv:readme] deps = From fca3714e784c12cd9e26b204948f7158230f54de Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 25 Mar 2017 06:49:08 -0500 Subject: [PATCH 16/34] Improve the regular expression for schemes --- src/rfc3986/misc.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 83c98b1..2c02d9e 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -47,12 +47,10 @@ NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%') # Extracted from http://tools.ietf.org/html/rfc3986#appendix-B component_pattern_dict = { - # NOTE(sigmavirus24): Our list of things we want to not match includes one - # item more than the RFC. We want to not parse the leading '[' from an - # IPv6 address into the scheme when provided something akin to: - # >>> rfc3986.uri_reference('[::1]') - # We would rather that appear to be a path than not. - 'scheme': '[^:/?#[]+', + # NOTE(sigmavirus24): We're going to use more strict regular expressions + # than appear in Appendix B for scheme. This will prevent over-eager + # consuming of items that aren't schemes. + 'scheme': '[a-zA-Z][a-zA-Z0-9+.-]*', 'authority': '[^/?#]*', 'path': '[^?#]*', 'query': '[^#]*', From 4aa02a7243181fd0938bfa36a9d5982588b7cdfe Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 25 Mar 2017 06:49:41 -0500 Subject: [PATCH 17/34] Add basic API Reference documentation --- docs/source/api-ref/api.rst | 9 +++++++++ docs/source/api-ref/index.rst | 12 ++++++++++++ docs/source/index.rst | 2 +- docs/source/narrative.rst | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 docs/source/api-ref/api.rst create mode 100644 docs/source/api-ref/index.rst diff --git a/docs/source/api-ref/api.rst b/docs/source/api-ref/api.rst new file mode 100644 index 0000000..646dc24 --- /dev/null +++ b/docs/source/api-ref/api.rst @@ -0,0 +1,9 @@ +=============== + API Submodule +=============== + +.. autofunction:: rfc3986.api.urlparse + +.. autofunction:: rfc3986.api.uri_reference + +.. autofunction:: rfc3986.api.normalize_uri diff --git a/docs/source/api-ref/index.rst b/docs/source/api-ref/index.rst new file mode 100644 index 0000000..8e30874 --- /dev/null +++ b/docs/source/api-ref/index.rst @@ -0,0 +1,12 @@ +=============== + API Reference +=============== + +This section contains API documentation generated from the source code of +|rfc3986|. If you're looking for an introduction to the module and how it +can be utilized, please see :ref:`narrative` instead. + +.. toctree:: + :maxdepth: 1 + + uri diff --git a/docs/source/index.rst b/docs/source/index.rst index f69505b..ce24f07 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,7 +19,7 @@ example, :caption: Contents: narrative - api-ref/uri + api-ref/index .. links diff --git a/docs/source/narrative.rst b/docs/source/narrative.rst index 94b41ef..5bf55f3 100644 --- a/docs/source/narrative.rst +++ b/docs/source/narrative.rst @@ -1,3 +1,5 @@ +.. _narrative: + ==================== User Documentation ==================== From 56f8bebc4bdde94c9cd0c5ae01a36c493fbac580 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 25 Mar 2017 06:50:12 -0500 Subject: [PATCH 18/34] Add narrative documentation around parsing --- docs/source/user/parsing.rst | 105 ++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/docs/source/user/parsing.rst b/docs/source/user/parsing.rst index c5785d4..c78943a 100644 --- a/docs/source/user/parsing.rst +++ b/docs/source/user/parsing.rst @@ -4,6 +4,107 @@ There are two ways to parse a URI with |rfc3986| -#. :meth:`rfc3986.uri.URIReference.from_string` +#. :meth:`rfc3986.api.uri_reference` -#. + This is best when you're **not** replacing existing usage of + :mod:`urllib.parse`. This also provides convenience methods around safely + normalizing URIs passed into it. + +#. :meth:`rfc3986.api.urlparse` + + This is best suited to completely replace :func:`urllib.parse.urlparse`. + It returns a class that should be indistinguishable from + :class:`urllib.parse.ParseResult` + +Let's look at some code samples. + + +Some Examples +============= + +First we'll parse the URL that points to the repository for this project. + +.. code-block:: python + + url = rfc3986.urlparse('https://github.com/sigmavirus24/rfc3986') + + +Then we'll replace parts of that URL with new values: + +.. code-block:: python + + print(url.copy_with( + userinfo='username:password', + port='443', + ).unsplit()) + # https://username:password@github.com/sigmavirus24/rfc3986 + +This, however, does not change the current ``url`` instance of +:class:`~rfc3986.parseresult.ParseResult`. As the method name might suggest, +we're copying that instance and then overriding certain attributes. +In fact, we can make as many copies as we like and nothing will change. + +.. code-block:: python + + print(url.copy_with( + scheme='ssh', + userinfo='git', + ).unsplit()) + # ssh://git@github.com/sigmavirus24/rfc3986 + print(url.scheme) + # https + +We can do similar things with URI References as well. + +.. code-block:: python + + uri = rfc3986.uri_reference('https://github.com/sigmavirus24/rfc3986') + print(uri.copy_with( + authority='username:password@github.com:443', + path='sigmavirus24/github3.py', + ).unsplit()) + # https://username:password@github.com/sigmavirus24/github3.py + +However, URI References may have some unexpected behaviour based strictly on +the RFC. + + +And Now For Something Slightly Unusual +====================================== + +If you are familiar with GitHub, GitLab, or a similar service, you may have +interacted with the "SSH URL" for some projects. For this project, +the SSH URL is: + +.. code:: + + git@github.com:sigmavirus24/rfc3986 + + +Let's see what happens when we parse this. + +.. code-block:: pycon + + >>> rfc3986.uri_reference('git@github.com:sigmavirus24/rfc3986') + URIReference(scheme=None, authority=None, + path=u'git@github.com:sigmavirus24/rfc3986', query=None, fragment=None) + +There's no scheme present, but it is apparent to our (human) eyes that +``git@github.com`` should not be part of the path. This is one of the areas +where :mod:`rfc3986` suffers slightly due to its strict conformance to +:rfc:`3986`. In the RFC, an authority must be preceded by ``//``. Let's see +what happens when we add that to our URI + +.. code-block:: pycon + + >>> rfc3986.uri_reference('//git@github.com:sigmavirus24/rfc3986') + URIReference(scheme=None, authority=u'git@github.com:sigmavirus24', + path=u'/rfc3986', query=None, fragment=None) + +Somewhat better, but not much. + +.. note:: + + The maintainers of :mod:`rfc3986` are working to discern better ways to + parse these less common URIs in a reasonable and sensible way without + losing conformance to the RFC. From 259950d85e0d5abdeb6da20091423f77a2c124ca Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 26 Mar 2017 14:28:30 -0500 Subject: [PATCH 19/34] Remove whitespace --- docs/source/narrative.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/narrative.rst b/docs/source/narrative.rst index 5bf55f3..1676c41 100644 --- a/docs/source/narrative.rst +++ b/docs/source/narrative.rst @@ -7,8 +7,8 @@ |rfc3986| has several API features and convenience methods. The core of |rfc3986|'s API revolves around parsing, validating, and building URIs. -There is an API to provide compatibility with :mod:`urllib.parse`, there is an -API to parse a URI as a URI Reference, there's an API to provide validation of +There is an API to provide compatibility with :mod:`urllib.parse`, there is an +API to parse a URI as a URI Reference, there's an API to provide validation of URIs, and finally there's an API to build URIs. .. note:: From e882c5608eea4d68b96bb1d11b1295fe7585eda0 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 26 Mar 2017 14:42:25 -0500 Subject: [PATCH 20/34] Fix Validator example code --- src/rfc3986/validators.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 5103fb5..8e7eb0b 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -21,25 +21,25 @@ from . import normalizers class Validator(object): """Object used to configure validation of all objects in rfc3986. - Example usage: + Example usage:: - .. code-block:: python - - >>> uri = rfc3986.uri_reference('https://github.com/') - >>> validator = rfc3986.Validator().require_components( - ... 'scheme', 'host', 'path', - ... ).allow_schemes( - ... 'http', 'https', - ... ).allow_hosts( - ... '127.0.0.1', 'github.com', - ... ) - ... - >>> validator.validate(uri) - >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') - >>> validator.validate(invalid_uri) - Traceback (most recent call last): - ... - ValidationErrors("Invalid scheme", "Missing path") + >>> from rfc3986 import api, validators + >>> uri = api.uri_reference('https://github.com/') + >>> validator = validators.Validator().require_presence_of( + ... 'scheme', 'host', 'path', + ... ).allow_schemes( + ... 'http', 'https', + ... ).allow_hosts( + ... '127.0.0.1', 'github.com', + ... ) + >>> validator.validate(uri) + >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') + >>> validator.validate(invalid_uri) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError: ('path was required but + missing', URIReference(scheme=u'imap', authority=u'mail.google.com', + path=None, query=None, fragment=None), ['path']) """ From d33f40afa519c3e0d13ec5106d10a46d8d7860cc Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 28 Mar 2017 06:37:43 -0500 Subject: [PATCH 21/34] Convert to using doctest directives We use and enable sphinx.ext.doctest so let's actually run our example code and ensure they work. --- docs/source/user/parsing.rst | 38 +++++++++++++++++++++++++++++------- tox.ini | 3 ++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/docs/source/user/parsing.rst b/docs/source/user/parsing.rst index c78943a..1682ed2 100644 --- a/docs/source/user/parsing.rst +++ b/docs/source/user/parsing.rst @@ -24,6 +24,12 @@ Some Examples First we'll parse the URL that points to the repository for this project. +.. testsetup:: * + + import rfc3986 + url = rfc3986.urlparse('https://github.com/sigmavirus24/rfc3986') + uri = rfc3986.uri_reference('https://github.com/sigmavirus24/rfc3986') + .. code-block:: python url = rfc3986.urlparse('https://github.com/sigmavirus24/rfc3986') @@ -31,39 +37,57 @@ First we'll parse the URL that points to the repository for this project. Then we'll replace parts of that URL with new values: -.. code-block:: python +.. testcode:: ex0 print(url.copy_with( userinfo='username:password', port='443', ).unsplit()) - # https://username:password@github.com/sigmavirus24/rfc3986 + +.. testoutput:: ex0 + + https://username:password@github.com:443/sigmavirus24/rfc3986 This, however, does not change the current ``url`` instance of :class:`~rfc3986.parseresult.ParseResult`. As the method name might suggest, we're copying that instance and then overriding certain attributes. In fact, we can make as many copies as we like and nothing will change. -.. code-block:: python +.. testcode:: ex1 print(url.copy_with( scheme='ssh', userinfo='git', ).unsplit()) - # ssh://git@github.com/sigmavirus24/rfc3986 + +.. testoutput:: ex1 + + ssh://git@github.com/sigmavirus24/rfc3986 + +.. testcode:: ex1 + print(url.scheme) - # https + +.. testoutput:: ex1 + + https We can do similar things with URI References as well. .. code-block:: python uri = rfc3986.uri_reference('https://github.com/sigmavirus24/rfc3986') + +.. testcode:: ex2 + print(uri.copy_with( authority='username:password@github.com:443', - path='sigmavirus24/github3.py', + path='/sigmavirus24/github3.py', ).unsplit()) - # https://username:password@github.com/sigmavirus24/github3.py + +.. testoutput:: ex2 + + https://username:password@github.com:443/sigmavirus24/github3.py However, URI References may have some unexpected behaviour based strictly on the RFC. diff --git a/tox.ini b/tox.ini index 06fa02a..597c76d 100644 --- a/tox.ini +++ b/tox.ini @@ -39,7 +39,8 @@ deps = sphinx>=1.3.0 sphinx-prompt commands = - sphinx-build -E -c docs/source/ -b html docs/source/ docs/build/html + sphinx-build -WE -c docs/source/ -b html docs/source/ docs/build/html + sphinx-build -WE -c docs/source/ -b doctest docs/source/ docs/build/html [testenv:readme] deps = From deb563755321871316728beabfa5a77ca3ca3573 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 28 Mar 2017 06:40:21 -0500 Subject: [PATCH 22/34] Provide an easy way to toy around with the library --- tox.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tox.ini b/tox.ini index 597c76d..6752851 100644 --- a/tox.ini +++ b/tox.ini @@ -20,6 +20,9 @@ deps = flake8-import-order commands = flake8 {posargs} src/rfc3986 +[testenv:venv] +commands = {posargs} + [testenv:build] deps = wheel From fb7ac98cc36b01b01af08a548d18f28cab18e7a7 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 28 Mar 2017 06:41:14 -0500 Subject: [PATCH 23/34] Add documentation for the URIBuilder object --- docs/source/api-ref/builder.rst | 23 +++++++ docs/source/api-ref/index.rst | 2 + docs/source/user/building.rst | 116 ++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 docs/source/api-ref/builder.rst create mode 100644 docs/source/user/building.rst diff --git a/docs/source/api-ref/builder.rst b/docs/source/api-ref/builder.rst new file mode 100644 index 0000000..a77ec53 --- /dev/null +++ b/docs/source/api-ref/builder.rst @@ -0,0 +1,23 @@ +==================== + URI Builder Module +==================== + +.. autoclass:: rfc3986.builder.URIBuilder + +.. automethod:: rfc3986.builder.URIBuilder.add_scheme + +.. automethod:: rfc3986.builder.URIBuilder.add_credentials + +.. automethod:: rfc3986.builder.URIBuilder.add_host + +.. automethod:: rfc3986.builder.URIBuilder.add_port + +.. automethod:: rfc3986.builder.URIBuilder.add_path + +.. automethod:: rfc3986.builder.URIBuilder.add_query_from + +.. automethod:: rfc3986.builder.URIBuilder.add_query + +.. automethod:: rfc3986.builder.URIBuilder.add_fragment + +.. automethod:: rfc3986.builder.URIBuilder.finalize diff --git a/docs/source/api-ref/index.rst b/docs/source/api-ref/index.rst index 8e30874..07ccce4 100644 --- a/docs/source/api-ref/index.rst +++ b/docs/source/api-ref/index.rst @@ -9,4 +9,6 @@ can be utilized, please see :ref:`narrative` instead. .. toctree:: :maxdepth: 1 + api + builder uri diff --git a/docs/source/user/building.rst b/docs/source/user/building.rst new file mode 100644 index 0000000..8fe6a82 --- /dev/null +++ b/docs/source/user/building.rst @@ -0,0 +1,116 @@ +=============== + Building URIs +=============== + +Constructing URLs often seems simple. There are some problems with +concatenating strings to build a URL: + +- Certain parts of the URL disallow certain characters + +- Formatting some parts of the URL is tricky and doing it manually isn't fun + +To make the experience better |rfc3986| provides the +:class:`~rfc3986.builder.URIBuilder` class to generate valid +:class:`~rfc3986.uri.URIReference` instances. The +:class:`~rfc3986.builder.URIBuilder` class will handle ensuring that each +component is normalized and safe for real world use. + + +Example Usage +============= + +.. note:: + + All of the methods on a :class:`~rfc3986.builder.URIBuilder` are + chainable (except :meth:`~rfc3986.builder.URIBuilder.finalize`). + +Let's build a basic URL with just a scheme and host. First we create an +instance of :class:`~rfc3986.builder.URIBuilder`. Then we call +:meth:`~rfc3986.builder.URIBuilder.add_scheme` and +:meth:`~rfc3986.builder.URIBuilder.add_host` with the scheme and host +we want to include in the URL. Then we convert our builder object into +a :class:`~rfc3986.uri.URIReference` and call +:meth:`~rfc3986.uri.URIReference.unsplit`. + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'github.com' + ... ).finalize().unsplit()) + https://github.com + +Each time you invoke a method, you get a new instance of a +:class:`~rfc3986.builder.URIBuilder` class so you can build several different +URLs from one base instance. + +.. doctest:: + + >>> from rfc3986 import builder + >>> github_builder = builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'api.github.com' + ... ) + >>> print(github_builder.add_path( + ... '/users/sigmavirus24' + ... ).finalize().unsplit()) + https://api.github.com/users/sigmavirus24 + >>> print(github_builder.add_path( + ... '/repos/sigmavirus24/rfc3986' + ... ).finalize().unsplit()) + https://api.github.com/repos/sigmavirus24/rfc3986 + +|rfc3986| makes adding authentication credentials convenient. It takes care of +making the credentials URL safe. There are some characters someone might want +to include in a URL that are not safe for the authority component of a URL. + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'api.github.com' + ... ).add_credentials( + ... username='us3r', + ... password='p@ssw0rd', + ... ).finalize().unsplit()) + https://us3r:p%40ssw0rd@api.github.com + +Further, |rfc3986| attempts to simplify the process of adding query parameters +to a URL. For example, if we were using Elasticsearch, we might do something +like: + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'search.example.com' + ... ).add_path( + ... '_search' + ... ).add_query_from( + ... [('q', 'repo:sigmavirus24/rfc3986'), ('sort', 'created_at:asc')] + ... ).finalize().unsplit()) + https://search.example.com/_search?q=repo%3Asigmavirus24%2Frfc3986&sort=created_at%3Aasc + +Finally, we provide a way to add a fragment to a URL. Let's build up a URL to +view the section of the RFC that refers to fragments: + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'tools.ietf.org' + ... ).add_path( + ... '/html/rfc3986' + ... ).add_fragment( + ... 'section-3.5' + ... ).finalize().unsplit()) + https://tools.ietf.org/html/rfc3986#section-3.5 From d99258fc01fde4b870614f1296b8b6718de29825 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 28 Mar 2017 06:47:36 -0500 Subject: [PATCH 24/34] Add stubs for validation documentation --- docs/source/api-ref/index.rst | 1 + docs/source/api-ref/validators.rst | 19 +++++++++++++++++++ docs/source/user/validating.rst | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 docs/source/api-ref/validators.rst create mode 100644 docs/source/user/validating.rst diff --git a/docs/source/api-ref/index.rst b/docs/source/api-ref/index.rst index 07ccce4..6b0733f 100644 --- a/docs/source/api-ref/index.rst +++ b/docs/source/api-ref/index.rst @@ -12,3 +12,4 @@ can be utilized, please see :ref:`narrative` instead. api builder uri + validators diff --git a/docs/source/api-ref/validators.rst b/docs/source/api-ref/validators.rst new file mode 100644 index 0000000..eaf078f --- /dev/null +++ b/docs/source/api-ref/validators.rst @@ -0,0 +1,19 @@ +====================== + Validators Submodule +====================== + +.. autoclass:: rfc3986.validators.Validator + +.. automethod:: rfc3986.validators.Validator.allow_schemes + +.. automethod:: rfc3986.validators.Validator.allow_hosts + +.. automethod:: rfc3986.validators.Validator.allow_ports + +.. automethod:: rfc3986.validators.Validator.allow_use_of_password + +.. automethod:: rfc3986.validators.Validator.forbid_use_of_password + +.. automethod:: rfc3986.validators.Validator.require_presence_of + +.. automethod:: rfc3986.validators.Validator.validate diff --git a/docs/source/user/validating.rst b/docs/source/user/validating.rst new file mode 100644 index 0000000..81ece28 --- /dev/null +++ b/docs/source/user/validating.rst @@ -0,0 +1,3 @@ +================= + Validating URIs +================= From a848da6f32dce14733751dcebef3eee8d808b938 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 1 Apr 2017 19:14:40 -0500 Subject: [PATCH 25/34] Use a better method of concatenating Using += on a str isn't as efficient as using str.format. --- src/rfc3986/builder.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 8a6de13..2eb9ab4 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -105,7 +105,10 @@ class URIBuilder(object): userinfo = normalizers.normalize_username(username) if password is not None: - userinfo += ':{}'.format(normalizers.normalize_password(password)) + userinfo = '{}:{}'.format( + userinfo, + normalizers.normalize_password(password), + ) return URIBuilder( scheme=self.scheme, From 82c21ae41f5b5aa4f99903810e4cb019261719f4 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 4 Apr 2017 10:20:15 -0500 Subject: [PATCH 26/34] Add more validator documentation --- docs/source/user/validating.rst | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/source/user/validating.rst b/docs/source/user/validating.rst index 81ece28..4908bf8 100644 --- a/docs/source/user/validating.rst +++ b/docs/source/user/validating.rst @@ -1,3 +1,56 @@ ================= Validating URIs ================= + +While not as difficult as `validating an email address`_, validating URIs is +tricky. Different parts of the URI allow different characters. Those sets +sometimes overlap and othertimes they don't and it's not very convenient. +Luckily, |rfc3986| makes validating URIs far simpler. + +Example Usage +============= + +First we need to create an instance of a +:class:`~rfc3986.validators.Validator` which takes no parameters. After that +we can call methods on the instance to indicate what we want to validate. + +Let's assume that we're building something that takes user input for a URl and +we want to ensure that URL is only ever using a specific domain with https. In +that case, our code would look like this: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ) + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + >>> validator.validate(uri_reference( + ... 'https://github.com/' + ... )) + >>> validator.validate(uri_reference( + ... 'http://example.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.UnpermittedComponentError + +First notice that we can easily reuse our validator object for each URL. +This allows users to not have to constantly reconstruct Validators for each +bit of user input. Next, we have three different URLs that we validate: + +#. ``https://github.com/sigmavirus24/rfc3986`` +#. ``https://github.com/`` +#. ``http://example.com`` + +As it stands, our validator will allow the first two URLs to pass but will +fail the third. This is specifically because we only allow URLs using +``https`` as a scheme and ``github.com`` as the domain name. + +.. _validating an email address: + http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx/ From ac06bd315920fc4922fd6a483730e4047652e0c6 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Fri, 7 Apr 2017 14:43:41 -0500 Subject: [PATCH 27/34] Wrap up documentation of Validator --- docs/source/user/validating.rst | 84 +++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/docs/source/user/validating.rst b/docs/source/user/validating.rst index 4908bf8..26a0444 100644 --- a/docs/source/user/validating.rst +++ b/docs/source/user/validating.rst @@ -52,5 +52,89 @@ As it stands, our validator will allow the first two URLs to pass but will fail the third. This is specifically because we only allow URLs using ``https`` as a scheme and ``github.com`` as the domain name. +Next, let's imagine that we want to prevent leaking user credentials. In that +case, we want to ensure that there is no password in the user information +portion of the authority. In that case, our new validator would look like this: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password() + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + >>> validator.validate(uri_reference( + ... 'https://github.com/' + ... )) + >>> validator.validate(uri_reference( + ... 'http://example.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.UnpermittedComponentError + >>> validator.validate(uri_reference( + ... 'https://sigmavirus24@github.com' + ... )) + >>> validator.validate(uri_reference( + ... 'https://sigmavirus24:not-my-real-password@github.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.PasswordForbidden + +Up until now, we have assumed that we will get a URL that has the appropriate +components for validation. For example, we assume that we will have a URL that +has a scheme and hostname. However, our current validation doesn't require +those items exist. + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password() + >>> validator.validate(uri_reference('//github.com')) + >>> validator.validate(uri_reference('https:/')) + +In the first case, we have a host name but no scheme and in the second we have +a scheme and a path but no host. If we want to ensure that those components +are there and that they are *always* what we allow, then we must add one last +item to our validator: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password( + ... ).require_presence_of( + ... 'scheme', 'host', + ... ) + >>> validator.validate(uri_reference('//github.com')) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError + >>> validator.validate(uri_reference('https:/')) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError + >>> validator.validate(uri_reference('https://github.com')) + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + +.. links + .. _validating an email address: http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx/ From cd453de7dd92bc0c960c8a1d16d04ec8d146d88d Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 29 Apr 2017 17:29:55 -0500 Subject: [PATCH 28/34] Move regular expressions into separate module --- src/rfc3986/abnf_regexp.py | 177 ++++++++++++++++++++++++++++++++++++ src/rfc3986/misc.py | 178 +++++++------------------------------ 2 files changed, 207 insertions(+), 148 deletions(-) create mode 100644 src/rfc3986/abnf_regexp.py diff --git a/src/rfc3986/abnf_regexp.py b/src/rfc3986/abnf_regexp.py new file mode 100644 index 0000000..183afc8 --- /dev/null +++ b/src/rfc3986/abnf_regexp.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for the regular expressions crafted from ABNF.""" + +# https://tools.ietf.org/html/rfc3986#page-13 +GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" +GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) +# https://tools.ietf.org/html/rfc3986#page-13 +SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" +SUB_DELIMITERS_SET = set(SUB_DELIMITERS) +# Escape the '*' for use in regular expressions +RE_SUB_DELIMITERS = "!$&'()\*+,;=" +RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) +ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' +DIGIT = '0123456789' +# https://tools.ietf.org/html/rfc3986#section-2.3 +UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-' +UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) +NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%') +# We need to escape the '-' in this case: +RE_UNRESERVED = 'A-Za-z0-9._~\-' + +# NOTE(sigmavirus24): We're going to use more strict regular expressions +# than appear in Appendix B for scheme. This will prevent over-eager +# consuming of items that aren't schemes. +SCHEME_RE = '[a-zA-Z][a-zA-Z0-9+.-]*' +AUTHORITY_RE = '[^/?#]*' +PATH_RE = '[^?#]*' +QUERY_RE = '[^#]*' +FRAGMENT_RE = '.*' + +# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B +COMPONENT_PATTERN_DICT = { + 'scheme': SCHEME_RE, + 'authority': AUTHORITY_RE, + 'path': PATH_RE, + 'query': QUERY_RE, + 'fragment': FRAGMENT_RE, +} + +# See http://tools.ietf.org/html/rfc3986#appendix-B +# In this case, we name each of the important matches so we can use +# SRE_Match#groupdict to parse the values out if we so choose. This is also +# modified to ignore other matches that are not important to the parsing of +# the reference so we can also simply use SRE_Match#groups. +URL_PARSING_RE = ( + '(?:(?P{scheme}):)?(?://(?P{authority}))?' + '(?P{path})(?:\?(?P{query}))?' + '(?:#(?P{fragment}))?' +).format(**COMPONENT_PATTERN_DICT) + + +# ######################### +# Authority Matcher Section +# ######################### + +# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 +# The pattern for a regular name, e.g., www.google.com, api.github.com +REGULAR_NAME_RE = REG_NAME = '(({0})*|[{1}]*)'.format( + '%[0-9A-Fa-f]{2}', RE_SUB_DELIMITERS + RE_UNRESERVED +) +# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, +IPv4_RE = '([0-9]{1,3}.){3}[0-9]{1,3}' +# Hexadecimal characters used in each piece of an IPv6 address +HEXDIG_RE = '[0-9A-Fa-f]{1,4}' +# Least-significant 32 bits of an IPv6 address +LS32_RE = '({hex}:{hex}|{ipv4})'.format(hex=HEXDIG_RE, ipv4=IPv4_RE) +# Substitutions into the following patterns for IPv6 patterns defined +# http://tools.ietf.org/html/rfc3986#page-20 +_subs = {'hex': HEXDIG_RE, 'ls32': LS32_RE} + +# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details +# about ABNF (Augmented Backus-Naur Form) use in the comments +variations = [ + # 6( h16 ":" ) ls32 + '(%(hex)s:){6}%(ls32)s' % _subs, + # "::" 5( h16 ":" ) ls32 + '::(%(hex)s:){5}%(ls32)s' % _subs, + # [ h16 ] "::" 4( h16 ":" ) ls32 + '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % _subs, + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % _subs, + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % _subs, + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % _subs, + # [ *4( h16 ":" ) h16 ] "::" ls32 + '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % _subs, + # [ *5( h16 ":" ) h16 ] "::" h16 + '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % _subs, + # [ *6( h16 ":" ) h16 ] "::" + '((%(hex)s:){0,6}%(hex)s)?::' % _subs, +] + +IPv6_RE = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format( + *variations +) + +IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % ( + RE_UNRESERVED + RE_SUB_DELIMITERS + ':' +) + +IP_LITERAL_RE = '\[({0}|{1})\]'.format(IPv6_RE, IPv_FUTURE_RE) + +# Pattern for matching the host piece of the authority +HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format( + REG_NAME, + IPv4_RE, + IP_LITERAL_RE, +) +USERINFO_RE = '^[A-Za-z0-9_.~\-%:]+' +PORT_RE = '[0-9]{1,5}' + +# #################### +# Path Matcher Section +# #################### + +# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information +# about the path patterns defined below. + +# Percent encoded character values +PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}' +PCHAR = '([' + RE_UNRESERVED + RE_SUB_DELIMITERS + ':@]|%s)' % PCT_ENCODED +segments = { + 'segment': PCHAR + '*', + # Non-zero length segment + 'segment-nz': PCHAR + '+', + # Non-zero length segment without ":" + 'segment-nz-nc': PCHAR.replace(':', '') + '+' +} + +# Path types taken from Section 3.3 (linked above) +PATH_EMPTY = '^$' +PATH_ROOTLESS = '%(segment-nz)s(/%(segment)s)*' % segments +PATH_NOSCHEME = '%(segment-nz-nc)s(/%(segment)s)*' % segments +PATH_ABSOLUTE = '/(%s)?' % PATH_ROOTLESS +PATH_ABEMPTY = '(/%(segment)s)*' % segments +PATH_RE = '^(%s|%s|%s|%s|%s)$' % ( + PATH_ABEMPTY, PATH_ABSOLUTE, PATH_NOSCHEME, PATH_ROOTLESS, PATH_EMPTY +) + +FRAGMENT_RE = QUERY_RE = ( + '^([/?:@' + RE_UNRESERVED + RE_SUB_DELIMITERS + ']|%s)*$' % PCT_ENCODED +) + +# ########################## +# Relative reference matcher +# ########################## + +# See http://tools.ietf.org/html/rfc3986#section-4.2 for details +RELATIVE_PART_RE = '(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + PATH_ABEMPTY, + PATH_ABSOLUTE, + PATH_NOSCHEME, + PATH_EMPTY, +) + +# See http://tools.ietf.org/html/rfc3986#section-3 for definition +HIER_PART_RE = '(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + PATH_ABEMPTY, + PATH_ABSOLUTE, + PATH_ROOTLESS, + PATH_EMPTY, +) diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 2c02d9e..90d32ed 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -21,188 +21,70 @@ expressions for parsing and validating URIs and their components. import re +from . import abnf_regexp + # These are enumerated for the named tuple used as a superclass of # URIReference URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment'] important_characters = { - 'generic_delimiters': ":/?#[]@", - 'sub_delimiters': "!$&'()*+,;=", + 'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS, + 'sub_delimiters': abnf_regexp.SUB_DELIMITERS, # We need to escape the '*' in this case - 're_sub_delimiters': "!$&'()\*+,;=", - 'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' - '0123456789._~-'), + 're_sub_delimiters': abnf_regexp.RE_SUB_DELIMITERS, + 'unreserved_chars': abnf_regexp.UNRESERVED_CHARS, # We need to escape the '-' in this case: - 're_unreserved': 'A-Za-z0-9._~\-', + 're_unreserved': abnf_regexp.RE_UNRESERVED, } # For details about delimiters and reserved characters, see: # http://tools.ietf.org/html/rfc3986#section-2.2 -GENERIC_DELIMITERS = set(important_characters['generic_delimiters']) -SUB_DELIMITERS = set(important_characters['sub_delimiters']) -RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS) +GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET +SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET +RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET # For details about unreserved characters, see: # http://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED_CHARS = set(important_characters['unreserved_chars']) -NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%') +UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET +NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET -# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B -component_pattern_dict = { - # NOTE(sigmavirus24): We're going to use more strict regular expressions - # than appear in Appendix B for scheme. This will prevent over-eager - # consuming of items that aren't schemes. - 'scheme': '[a-zA-Z][a-zA-Z0-9+.-]*', - 'authority': '[^/?#]*', - 'path': '[^?#]*', - 'query': '[^#]*', - 'fragment': '.*', - } - -# See http://tools.ietf.org/html/rfc3986#appendix-B -# In this case, we name each of the important matches so we can use -# SRE_Match#groupdict to parse the values out if we so choose. This is also -# modified to ignore other matches that are not important to the parsing of -# the reference so we can also simply use SRE_Match#groups. -expression = ('(?:(?P{scheme}):)?(?://(?P{authority}))?' - '(?P{path})(?:\?(?P{query}))?' - '(?:#(?P{fragment}))?' - ).format(**component_pattern_dict) - -URI_MATCHER = re.compile(expression) - -# ######################### -# Authority Matcher Section -# ######################### - -# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 -# The pattern for a regular name, e.g., www.google.com, api.github.com -reg_name = '(({0})*|[{1}]*)'.format( - '%[0-9A-Fa-f]{2}', - important_characters['re_sub_delimiters'] + - important_characters['re_unreserved'] - ) -# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -ipv4 = '([0-9]{1,3}.){3}[0-9]{1,3}' -# Hexadecimal characters used in each piece of an IPv6 address -hexdig = '[0-9A-Fa-f]{1,4}' -# Least-significant 32 bits of an IPv6 address -ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4) -# Substitutions into the following patterns for IPv6 patterns defined -# http://tools.ietf.org/html/rfc3986#page-20 -subs = {'hex': hexdig, 'ls32': ls32} - -# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details -# about ABNF (Augmented Backus-Naur Form) use in the comments -variations = [ - # 6( h16 ":" ) ls32 - '(%(hex)s:){6}%(ls32)s' % subs, - # "::" 5( h16 ":" ) ls32 - '::(%(hex)s:){5}%(ls32)s' % subs, - # [ h16 ] "::" 4( h16 ":" ) ls32 - '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs, - # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs, - # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs, - # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs, - # [ *4( h16 ":" ) h16 ] "::" ls32 - '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs, - # [ *5( h16 ":" ) h16 ] "::" h16 - '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs, - # [ *6( h16 ":" ) h16 ] "::" - '((%(hex)s:){0,6}%(hex)s)?::' % subs, - ] - -ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations) - -ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % ( - important_characters['re_unreserved'] + - important_characters['re_sub_delimiters'] + - ':') - -ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future) - -# Pattern for matching the host piece of the authority -HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal) +URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) SUBAUTHORITY_MATCHER = re.compile(( - '^(?:(?P[A-Za-z0-9_.~\-%:]+)@)?' # userinfo - '(?P{0}?)' # host - ':?(?P[0-9]+)?$' # port - ).format(HOST_PATTERN)) - -IPv4_MATCHER = re.compile('^' + ipv4 + '$') + '^(?:(?P{0})@)?' # userinfo + '(?P{1}?)' # host + ':?(?P{2})?$' # port + ).format(abnf_regexp.USERINFO_RE, + abnf_regexp.HOST_PATTERN, + abnf_regexp.PORT_RE)) -# #################### -# Path Matcher Section -# #################### - -# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information -# about the path patterns defined below. - -# Percent encoded character values -pct_encoded = '%[A-Fa-f0-9]{2}' -pchar = ('([' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ':@]|%s)' % pct_encoded) -segments = { - 'segment': pchar + '*', - # Non-zero length segment - 'segment-nz': pchar + '+', - # Non-zero length segment without ":" - 'segment-nz-nc': pchar.replace(':', '') + '+' - } - -# Path types taken from Section 3.3 (linked above) -path_empty = '^$' -path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments -path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments -path_absolute = '/(%s)?' % path_rootless -path_abempty = '(/%(segment)s)*' % segments +IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$') # Matcher used to validate path components -PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % ( - path_abempty, path_absolute, path_noscheme, path_rootless, path_empty - )) +PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) # ################################## # Query and Fragment Matcher Section # ################################## -QUERY_MATCHER = re.compile( - '^([/?:@' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ']|%s)*$' % pct_encoded) +QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE) FRAGMENT_MATCHER = QUERY_MATCHER # Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 -SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$') - -# Relative reference matcher - -# See http://tools.ietf.org/html/rfc3986#section-4.2 for details -relative_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_noscheme, path_empty - ) +SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE)) RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % ( - relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern - )) - -# See http://tools.ietf.org/html/rfc3986#section-3 for definition -hier_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_rootless, path_empty - ) + abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, + abnf_regexp.FRAGMENT_RE, +)) # See http://tools.ietf.org/html/rfc3986#section-4.3 ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % ( - component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1] - )) + abnf_regexp.COMPONENT_PATTERN_DICT['scheme'], + abnf_regexp.HIER_PART_RE, + abnf_regexp.QUERY_RE[1:-1], +)) # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 From 6b79edcb72ebdf372860026084e5dfbf1ebed82a Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 30 Apr 2017 08:04:04 -0500 Subject: [PATCH 29/34] Tidy up docstrings for pydocstyle 2.0 --- src/rfc3986/parseresult.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index dc9d4d5..838a0c3 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -45,22 +45,22 @@ class ParseResultMixin(object): return self.authority def geturl(self): - """Standard library shim to the unsplit method.""" + """Shim to match the standard library method.""" return self.unsplit() @property def hostname(self): - """Standard library shim for the host portion of the URI.""" + """Shim to match the standard library.""" return self.host @property def netloc(self): - """Standard library shim for the authority portion of the URI.""" + """Shim to match the standard library.""" return self.authority @property def params(self): - """Standard library shim for the query portion of the URI.""" + """Shim to match the standard library.""" return self.query @@ -147,7 +147,7 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), @property def authority(self): - """Normalized authority generated from the subauthority parts.""" + """Return the normalized authority.""" return self.reference.authority def copy_with(self, scheme=None, userinfo=None, host=None, port=None, @@ -280,7 +280,7 @@ class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), @property def authority(self): - """Normalized authority generated from the subauthority parts.""" + """Return the normalized authority.""" return self.reference.authority.encode(self.encoding) def copy_with(self, scheme=None, userinfo=None, host=None, port=None, From 18a689de0d02a669069086264fc92e82ca849584 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sat, 6 May 2017 20:18:38 -0500 Subject: [PATCH 30/34] Document misc and abnf_regexp submodules Add UseExisting and begin using it in the API. Also rename some now public attributes in rfc3986.abnf_regexp. Refs #24 --- docs/source/api-ref/index.rst | 1 + docs/source/api-ref/miscellaneous.rst | 231 ++++++++++++++++++++++++++ src/rfc3986/abnf_regexp.py | 28 ++-- src/rfc3986/misc.py | 10 +- src/rfc3986/uri.py | 7 +- 5 files changed, 257 insertions(+), 20 deletions(-) create mode 100644 docs/source/api-ref/miscellaneous.rst diff --git a/docs/source/api-ref/index.rst b/docs/source/api-ref/index.rst index 6b0733f..7c0f58c 100644 --- a/docs/source/api-ref/index.rst +++ b/docs/source/api-ref/index.rst @@ -13,3 +13,4 @@ can be utilized, please see :ref:`narrative` instead. builder uri validators + miscellaneous diff --git a/docs/source/api-ref/miscellaneous.rst b/docs/source/api-ref/miscellaneous.rst new file mode 100644 index 0000000..08ea7ff --- /dev/null +++ b/docs/source/api-ref/miscellaneous.rst @@ -0,0 +1,231 @@ +========================== + Miscellaneous Submodules +========================== + +There are several submodules in |rfc3986| that are not meant to be exposed to +users directly but which are valuable to document, regardless. + +.. data:: rfc3986.misc.UseExisting + + A sentinel object to make certain APIs simpler for users. + +.. module:: rfc3986.abnf_regexp + +The :mod:`rfc3986.abnf_regexp` module contains the regular expressions written +from the RFC's ABNF. The :mod:`rfc3986.misc` module contains compiled regular +expressions from :mod:`rfc3986.abnf_regexp` and previously contained those +regular expressions. + +.. data:: rfc3986.abnf_regexp.GEN_DELIMS +.. data:: rfc3986.abnf_regexp.GENERIC_DELIMITERS + + The string containing all of the generic delimiters as defined on + `page 13 `__. + +.. data:: rfc3986.abnf_regexp.GENERIC_DELIMITERS_SET + + :data:`rfc3986.abnf_regexp.GEN_DELIMS` represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMS +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS + + The string containing all of the 'sub' delimiters as defined on + `page 13 `__. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS_SET + + :data:`rfc3986.abnf_regexp.SUB_DELIMS` represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS_RE + + :data:`rfc3986.abnf_regexp.SUB_DELIMS` with the ``*`` escaped for use in + regular expressions. + +.. data:: rfc3986.abnf_regexp.RESERVED_CHARS_SET + + A :class:`set` constructed of :data:`GEN_DELIMS` and :data:`SUB_DELIMS`. + This union is defined on `page 13 + `__. + +.. data:: rfc3986.abnf_regexp.ALPHA + + The string of upper- and lower-case letters in USASCII. + +.. data:: rfc3986.abnf_regexp.DIGIT + + The string of digits 0 through 9. + +.. data:: rfc3986.abnf_regexp.UNRESERVED +.. data:: rfc3986.abnf_regexp.UNRESERVED_CHARS + + The string of unreserved characters defined in :rfc:`3986#section-2.3`. + +.. data:: rfc3986.abnf_regexp.UNRESERVED_CHARS_SET + + :data:`rfc3986.abnf_regexp.UNRESERVED_CHARS` represented as a + :class:`set`. + +.. data:: rfc3986.abnf_regexp.NON_PCT_ENCODED_SET + + The non-percent encoded characters represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.UNRESERVED_RE + + Optimized regular expression for unreserved characters. + +.. data:: rfc3986.abnf_regexp.SCHEME_RE + + Stricter regular expression to match and validate the scheme part + of a URI. + +.. data:: rfc3986.abnf_regexp.COMPONENT_PATTERN_DICT + + Dictionary with regular expressions to match various components in + a URI. Except for :data:`rfc3986.abnf_regexp.SCHEME_RE`, all patterns + are from :rfc:`3986#appendix-B`. + +.. data:: rfc3986.abnf_regexp.URL_PARSING_RE + + Regular expression compposed from the components in + :data:`rfc3986.abnf_regexp.COMPONENT_PATTERN_DICT`. + +.. data:: rfc3986.abnf_regexp.HEXDIG_RE + + Hexadecimal characters used in each piece of an IPv6 address. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.LS32_RE + + Lease significant 32 bits of an IPv6 address. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.REG_NAME +.. data:: rfc3986.abnf_regexp.REGULAR_NAME_RE + + The pattern for a regular name, e.g., ``www.google.com``, + ``api.github.com``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv4_RE + + The pattern for an IPv4 address, e.g., ``192.168.255.255``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv6_RE + + The pattern for an IPv6 address, e.g., ``::1``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv_FUTURE_RE + + A regular expression to parse out IPv Futures. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IP_LITERAL_RE + + Pattern to match IPv6 addresses and IPv Future addresses. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.HOST_RE +.. data:: rfc3986.abnf_regexp.HOST_PATTERN + + Pattern to match and validate the host piece of an authority. + This is composed of + + - :data:`rfc3986.abnf_regexp.REG_NAME` + - :data:`rfc3986.abnf_regexp.IPv4_RE` + - :data:`rfc3986.abnf_regexp.IP_LITERAL_RE` + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.USERINFO_RE + + Pattern to match and validate the user information portion of + an authority component. + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.PORT_RE + + Pattern to match and validate the port portion of an authority + component. + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.PCT_ENCODED +.. data:: rfc3986.abnf_regexp.PERCENT_ENCODED + + Regular expression to match percent encoded character values. + +.. data:: rfc3986.abnf_regexp.PCHAR + + Regular expression to match printable characters. + +.. data:: rfc3986.abnf_regexp.PATH_RE + + Regular expression to match and validate the path component of a URI. + + See :rfc:`3986#section-3.3`. + +.. data:: rfc3986.abnf_regexp.PATH_EMPTY +.. data:: rfc3986.abnf_regexp.PATH_ROOTLESS +.. data:: rfc3986.abnf_regexp.PATH_NOSCHEME +.. data:: rfc3986.abnf_regexp.PATH_ABSOLUTE +.. data:: rfc3986.abnf_regexp.PATH_ABEMPTY + + Components of the :data:`rfc3986.abnf_regexp.PATH_RE`. + + See :rfc:`3986#section-3.3`. + +.. data:: rfc3986.abnf_regexp.QUERY_RE + + Regular expression to parse and validate the query component of a URI. + +.. data:: rfc3986.abnf_regexp.FRAGMENT_RE + + Regular expression to parse and validate the fragment component of a URI. + +.. data:: rfc3986.abnf_regexp.RELATIVE_PART_RE + + Regular expression to parse the relative URI when resolving URIs. + +.. data:: rfc3986.abnf_regexp.HIER_PART_RE + + The hierarchical part of a URI. This regular expression is used when + resolving relative URIs. + + See :rfc:`3986#section-3`. + +.. module:: rfc3986.misc + +.. data:: rfc3986.misc.URI_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.URL_PARSING_RE`. + +.. data:: rfc3986.misc.SUBAUTHORITY_MATCHER + + Compiled compilation of :data:`rfc3986.abnf_regexp.USERINFO_RE`, + :data:`rfc3986.abnf_regexp.HOST_PATTERN`, + :data:`rfc3986.abnf_regexp.PORT_RE`. + +.. data:: rfc3986.misc.SCHEME_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.SCHEME_RE`. + +.. data:: rfc3986.misc.IPv4_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.IPv4_RE`. + +.. data:: rfc3986.misc.PATH_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.PATH_RE`. + +.. data:: rfc3986.misc.QUERY_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.QUERY_RE`. + +.. data:: rfc3986.misc.RELATIVE_REF_MATCHER + + Compiled compilation of :data:`rfc3986.abnf_regexp.SCHEME_RE`, + :data:`rfc3986.abnf_regexp.HIER_PART_RE`, + :data:`rfc3986.abnf_regexp.QUERY_RE`. diff --git a/src/rfc3986/abnf_regexp.py b/src/rfc3986/abnf_regexp.py index 183afc8..adef09a 100644 --- a/src/rfc3986/abnf_regexp.py +++ b/src/rfc3986/abnf_regexp.py @@ -20,7 +20,7 @@ GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" SUB_DELIMITERS_SET = set(SUB_DELIMITERS) # Escape the '*' for use in regular expressions -RE_SUB_DELIMITERS = "!$&'()\*+,;=" +SUB_DELIMITERS_RE = "!$&'()\*+,;=" RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' DIGIT = '0123456789' @@ -29,24 +29,24 @@ UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-' UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%') # We need to escape the '-' in this case: -RE_UNRESERVED = 'A-Za-z0-9._~\-' +UNRESERVED_RE = 'A-Za-z0-9._~\-' # NOTE(sigmavirus24): We're going to use more strict regular expressions # than appear in Appendix B for scheme. This will prevent over-eager # consuming of items that aren't schemes. SCHEME_RE = '[a-zA-Z][a-zA-Z0-9+.-]*' -AUTHORITY_RE = '[^/?#]*' -PATH_RE = '[^?#]*' -QUERY_RE = '[^#]*' -FRAGMENT_RE = '.*' +_AUTHORITY_RE = '[^/?#]*' +_PATH_RE = '[^?#]*' +_QUERY_RE = '[^#]*' +_FRAGMENT_RE = '.*' # Extracted from http://tools.ietf.org/html/rfc3986#appendix-B COMPONENT_PATTERN_DICT = { 'scheme': SCHEME_RE, - 'authority': AUTHORITY_RE, - 'path': PATH_RE, - 'query': QUERY_RE, - 'fragment': FRAGMENT_RE, + 'authority': _AUTHORITY_RE, + 'path': _PATH_RE, + 'query': _QUERY_RE, + 'fragment': _FRAGMENT_RE, } # See http://tools.ietf.org/html/rfc3986#appendix-B @@ -68,7 +68,7 @@ URL_PARSING_RE = ( # Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 # The pattern for a regular name, e.g., www.google.com, api.github.com REGULAR_NAME_RE = REG_NAME = '(({0})*|[{1}]*)'.format( - '%[0-9A-Fa-f]{2}', RE_SUB_DELIMITERS + RE_UNRESERVED + '%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + UNRESERVED_RE ) # The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, IPv4_RE = '([0-9]{1,3}.){3}[0-9]{1,3}' @@ -108,7 +108,7 @@ IPv6_RE = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format( ) IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % ( - RE_UNRESERVED + RE_SUB_DELIMITERS + ':' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':' ) IP_LITERAL_RE = '\[({0}|{1})\]'.format(IPv6_RE, IPv_FUTURE_RE) @@ -131,7 +131,7 @@ PORT_RE = '[0-9]{1,5}' # Percent encoded character values PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}' -PCHAR = '([' + RE_UNRESERVED + RE_SUB_DELIMITERS + ':@]|%s)' % PCT_ENCODED +PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED segments = { 'segment': PCHAR + '*', # Non-zero length segment @@ -151,7 +151,7 @@ PATH_RE = '^(%s|%s|%s|%s|%s)$' % ( ) FRAGMENT_RE = QUERY_RE = ( - '^([/?:@' + RE_UNRESERVED + RE_SUB_DELIMITERS + ']|%s)*$' % PCT_ENCODED + '^([/?:@' + UNRESERVED_RE + SUB_DELIMITERS_RE + ']|%s)*$' % PCT_ENCODED ) # ########################## diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 90d32ed..2e9b8f1 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -31,11 +31,12 @@ important_characters = { 'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS, 'sub_delimiters': abnf_regexp.SUB_DELIMITERS, # We need to escape the '*' in this case - 're_sub_delimiters': abnf_regexp.RE_SUB_DELIMITERS, + 're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE, 'unreserved_chars': abnf_regexp.UNRESERVED_CHARS, # We need to escape the '-' in this case: - 're_unreserved': abnf_regexp.RE_UNRESERVED, - } + 're_unreserved': abnf_regexp.UNRESERVED_RE, +} + # For details about delimiters and reserved characters, see: # http://tools.ietf.org/html/rfc3986#section-2.2 GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET @@ -96,3 +97,6 @@ def merge_paths(base_uri, relative_path): path = base_uri.path or '' index = path.rfind('/') return path[:index] + '/' + relative_path + + +UseExisting = object() diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 98dc437..93694a5 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -412,8 +412,9 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): result_list.extend(['#', self.fragment]) return ''.join(result_list) - def copy_with(self, scheme=None, authority=None, path=None, query=None, - fragment=None): + def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting): """Create a copy of this reference with the new components. :param str scheme: @@ -439,7 +440,7 @@ class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): 'fragment': fragment, } for key, value in list(attributes.items()): - if value is None: + if value is misc.UseExisting: del attributes[key] uri = self._replace(**attributes) uri.encoding = self.encoding From 432f66d1a5d9f5c35a2dda220b8d410f208a3225 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 7 May 2017 07:45:42 -0500 Subject: [PATCH 31/34] Switch over parseresult to UseExisting Document how to remove components --- docs/source/user/parsing.rst | 13 +++++++++++++ src/rfc3986/parseresult.py | 17 +++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/docs/source/user/parsing.rst b/docs/source/user/parsing.rst index 1682ed2..5ec39cf 100644 --- a/docs/source/user/parsing.rst +++ b/docs/source/user/parsing.rst @@ -92,6 +92,19 @@ We can do similar things with URI References as well. However, URI References may have some unexpected behaviour based strictly on the RFC. +Finally, if you want to remove a component from a URI, you may pass ``None`` +to remove it, for example: + +.. testcode:: ex3 + + print(uri.copy_with(path=None).unsplit()) + +.. testoutput:: ex3 + + https://github.com + +This will work on both URI References and Parse Results. + And Now For Something Slightly Unusual ====================================== diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index 838a0c3..4fdd50a 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -17,6 +17,7 @@ from collections import namedtuple from . import compat from . import exceptions +from . import misc from . import normalizers from . import uri @@ -150,14 +151,16 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), """Return the normalized authority.""" return self.reference.authority - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None): + def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting, + host=misc.UseExisting, port=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting): """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} for name, value in attributes: - if value is None: + if value is misc.UseExisting: value = getattr(self, name) attrs_dict[name] = value authority = self._generate_authority(attrs_dict) @@ -283,14 +286,16 @@ class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), """Return the normalized authority.""" return self.reference.authority.encode(self.encoding) - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None, lazy_normalize=True): + def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting, + host=misc.UseExisting, port=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting, lazy_normalize=True): """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} for name, value in attributes: - if value is None: + if value is misc.UseExisting: value = getattr(self, name) if not isinstance(value, bytes) and hasattr(value, 'encode'): value = value.encode(self.encoding) From 21a1be2188aa3588e6bedbfd55d0c58e4b0718a5 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Mon, 8 May 2017 07:04:27 -0500 Subject: [PATCH 32/34] Add support for Zone Identifiers from RFC 6874 Refs #2 --- docs/source/index.rst | 3 ++- src/rfc3986/abnf_regexp.py | 19 ++++++++++++++----- src/rfc3986/misc.py | 2 +- tests/conftest.py | 22 ++++++++++++++++++---- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index ce24f07..483dee0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,8 @@ ========= |rfc3986| is a Python implementation of :rfc:`3986` including validation and -authority parsing. +authority parsing. This module also supports :rfc:`6874` which adds support +for zone identifiers to IPv6 Addresses. The maintainers strongly suggest using `pip`_ to install |rfc3986|. For example, diff --git a/src/rfc3986/abnf_regexp.py b/src/rfc3986/abnf_regexp.py index adef09a..90bd1e4 100644 --- a/src/rfc3986/abnf_regexp.py +++ b/src/rfc3986/abnf_regexp.py @@ -31,6 +31,10 @@ NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%') # We need to escape the '-' in this case: UNRESERVED_RE = 'A-Za-z0-9._~\-' +# Percent encoded character values +PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}' +PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED + # NOTE(sigmavirus24): We're going to use more strict regular expressions # than appear in Appendix B for scheme. This will prevent over-eager # consuming of items that aren't schemes. @@ -111,7 +115,16 @@ IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % ( UNRESERVED_RE + SUB_DELIMITERS_RE + ':' ) -IP_LITERAL_RE = '\[({0}|{1})\]'.format(IPv6_RE, IPv_FUTURE_RE) + +# RFC 6874 Zone ID ABNF +ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+' +IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID + +IP_LITERAL_RE = '\[({0}|(?:{1})|{2})\]'.format( + IPv6_RE, + IPv6_ADDRZ_RE, + IPv_FUTURE_RE, +) # Pattern for matching the host piece of the authority HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format( @@ -128,10 +141,6 @@ PORT_RE = '[0-9]{1,5}' # See http://tools.ietf.org/html/rfc3986#section-3.3 for more information # about the path patterns defined below. - -# Percent encoded character values -PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}' -PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED segments = { 'segment': PCHAR + '*', # Non-zero length segment diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 2e9b8f1..9eaf064 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -51,7 +51,7 @@ URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) SUBAUTHORITY_MATCHER = re.compile(( '^(?:(?P{0})@)?' # userinfo - '(?P{1}?)' # host + '(?P{1})' # host ':?(?P{2})?$' # port ).format(abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, diff --git a/tests/conftest.py b/tests/conftest.py index 6474931..7358b9e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,16 +7,30 @@ import pytest SNOWMAN = b'\xe2\x98\x83' valid_hosts = [ - '[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', '[::1]', - '[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', '[FE80::2AA:FF:FE9A:4CA2]', - '[FF02::2]', '[FF02:3::5]', '[FF02:0:0:0:0:0:0:2]', - '[FF02:30:0:0:0:0:0:5]', '127.0.0.1', 'www.example.com', 'localhost', + '[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', + '[::1]', + '[::1%25lo]', # With ZoneID + '[FF02:0:0:0:0:0:0:2%25en01]', # With ZoneID + '[FF02:30:0:0:0:0:0:5%25en1]', # With ZoneID + '[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', + '[FE80::2AA:FF:FE9A:4CA2]', + '[FF02::2]', + '[FF02:3::5]', + '[FF02:0:0:0:0:0:0:2]', + '[FF02:30:0:0:0:0:0:5]', + '127.0.0.1', + 'www.example.com', + 'localhost', 'http-bin.org', ] invalid_hosts = [ '[FF02::3::5]', # IPv6 can only have one :: '[FADF:01]', # Not properly compacted (missing a :) + '[FADF:01%en0]', # Not properly compacted (missing a :), Invalid ZoneID + '[FADF::01%en0]', # Invalid ZoneID separator + '[FADF::01%]', # Invalid ZoneID separator and no ZoneID + '[FADF::01%25]', # Missing ZoneID 'localhost:80:80:80', # Too many ports '256.256.256.256', # Invalid IPv4 Address SNOWMAN.decode('utf-8') From d1fafcc2c16059e5e11340297740eaf97f219469 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Wed, 10 May 2017 06:53:28 -0500 Subject: [PATCH 33/34] Add release notes to our documentation --- HISTORY.rst | 63 ----------------------------- MANIFEST.in | 3 +- docs/source/index.rst | 1 + docs/source/release-notes/0.1.0.rst | 4 ++ docs/source/release-notes/0.2.0.rst | 7 ++++ docs/source/release-notes/0.2.1.rst | 9 +++++ docs/source/release-notes/0.2.2.rst | 7 ++++ docs/source/release-notes/0.3.0.rst | 7 ++++ docs/source/release-notes/0.3.1.rst | 4 ++ docs/source/release-notes/0.4.0.rst | 8 ++++ docs/source/release-notes/0.4.1.rst | 5 +++ docs/source/release-notes/0.4.2.rst | 5 +++ docs/source/release-notes/1.0.0.rst | 26 ++++++++++++ docs/source/release-notes/index.rst | 28 +++++++++++++ setup.py | 5 +-- 15 files changed, 113 insertions(+), 69 deletions(-) delete mode 100644 HISTORY.rst create mode 100644 docs/source/release-notes/0.1.0.rst create mode 100644 docs/source/release-notes/0.2.0.rst create mode 100644 docs/source/release-notes/0.2.1.rst create mode 100644 docs/source/release-notes/0.2.2.rst create mode 100644 docs/source/release-notes/0.3.0.rst create mode 100644 docs/source/release-notes/0.3.1.rst create mode 100644 docs/source/release-notes/0.4.0.rst create mode 100644 docs/source/release-notes/0.4.1.rst create mode 100644 docs/source/release-notes/0.4.2.rst create mode 100644 docs/source/release-notes/1.0.0.rst create mode 100644 docs/source/release-notes/index.rst diff --git a/HISTORY.rst b/HISTORY.rst deleted file mode 100644 index b756066..0000000 --- a/HISTORY.rst +++ /dev/null @@ -1,63 +0,0 @@ -0.4.2 -- 2016-08-22 -------------------- - -- Avoid parsing an string with just an IPv6 address as having a scheme of - ``[``. - -0.4.1 -- 2016-08-22 -------------------- - -- Normalize URIs constructed using ``ParseResult.from_parts`` and - ``ParseResultBytes.from_parts`` - -0.4.0 -- 2016-08-20 -------------------- - -- Add ``ParseResult.from_parts`` and ``ParseResultBytes.from_parts`` class - methods to easily create a ParseResult - -- When using regular expressions, use ``[0-9]`` instead of ``\d`` to avoid - finding ports with "numerals" that are not valid in a port - -0.3.1 -- 2015-12-15 -------------------- - -- Preserve empty query strings during normalization - -0.3.0 -- 2015-10-20 -------------------- - -- Read README and HISTORY files using the appropriate codec so rfc3986 can be - installed on systems with locale's other than utf-8 (specifically C) - -- Replace the standard library's urlparse behaviour - -0.2.2 -- 2015-05-27 -------------------- - -- Update the regular name regular expression to accept all of the characters - allowed in the RFC. Closes bug #11 (Thanks Viktor Haag). Previously URIs - similar to "http://http-bin.org" would be considered invalid. - -0.2.1 -- 2015-03-20 -------------------- - -- Check that the bytes of an IPv4 Host Address are within the valid range. - Otherwise, URIs like "http://256.255.255.0/v1/resource" are considered - valid. - -- Add 6 to the list of unreserved characters. It was previously missing. - Closes bug #9 - -0.2.0 -- 2014-06-30 -------------------- - -- Add support for requiring components during validation. This includes adding - parameters ``require_scheme``, ``require_authority``, ``require_path``, - ``require_path``, ``require_query``, and ``require_fragment`` to - ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. - -0.1.0 -- 2014-06-27 -------------------- - -- Initial Release includes validation and normalization of URIs diff --git a/MANIFEST.in b/MANIFEST.in index 74148bc..4cf3d01 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,8 @@ include README.rst include LICENSE -include HISTORY.rst include AUTHORS.rst include setup.cfg prune *.pyc -#recursive-include docs *.rst *.py Makefile +recursive-include docs *.rst *.py recursive-include tests *.py prune docs/_build diff --git a/docs/source/index.rst b/docs/source/index.rst index 483dee0..757a427 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,6 +21,7 @@ example, narrative api-ref/index + release-notes/index .. links diff --git a/docs/source/release-notes/0.1.0.rst b/docs/source/release-notes/0.1.0.rst new file mode 100644 index 0000000..599ac8c --- /dev/null +++ b/docs/source/release-notes/0.1.0.rst @@ -0,0 +1,4 @@ +0.1.0 -- 2014-06-27 +------------------- + +- Initial Release includes validation and normalization of URIs diff --git a/docs/source/release-notes/0.2.0.rst b/docs/source/release-notes/0.2.0.rst new file mode 100644 index 0000000..dab34a7 --- /dev/null +++ b/docs/source/release-notes/0.2.0.rst @@ -0,0 +1,7 @@ +0.2.0 -- 2014-06-30 +------------------- + +- Add support for requiring components during validation. This includes adding + parameters ``require_scheme``, ``require_authority``, ``require_path``, + ``require_path``, ``require_query``, and ``require_fragment`` to + ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. diff --git a/docs/source/release-notes/0.2.1.rst b/docs/source/release-notes/0.2.1.rst new file mode 100644 index 0000000..4706e44 --- /dev/null +++ b/docs/source/release-notes/0.2.1.rst @@ -0,0 +1,9 @@ +0.2.1 -- 2015-03-20 +------------------- + +- Check that the bytes of an IPv4 Host Address are within the valid range. + Otherwise, URIs like "http://256.255.255.0/v1/resource" are considered + valid. + +- Add 6 to the list of unreserved characters. It was previously missing. + Closes bug #9 diff --git a/docs/source/release-notes/0.2.2.rst b/docs/source/release-notes/0.2.2.rst new file mode 100644 index 0000000..2ad7815 --- /dev/null +++ b/docs/source/release-notes/0.2.2.rst @@ -0,0 +1,7 @@ +0.2.2 -- 2015-05-27 +------------------- + +- Update the regular name regular expression to accept all of the characters + allowed in the RFC. Closes bug #11 (Thanks Viktor Haag). Previously URIs + similar to "http://http-bin.org" would be considered invalid. + diff --git a/docs/source/release-notes/0.3.0.rst b/docs/source/release-notes/0.3.0.rst new file mode 100644 index 0000000..9a1954f --- /dev/null +++ b/docs/source/release-notes/0.3.0.rst @@ -0,0 +1,7 @@ +0.3.0 -- 2015-10-20 +------------------- + +- Read README and HISTORY files using the appropriate codec so rfc3986 can be + installed on systems with locale's other than utf-8 (specifically C) + +- Replace the standard library's urlparse behaviour diff --git a/docs/source/release-notes/0.3.1.rst b/docs/source/release-notes/0.3.1.rst new file mode 100644 index 0000000..579368e --- /dev/null +++ b/docs/source/release-notes/0.3.1.rst @@ -0,0 +1,4 @@ +0.3.1 -- 2015-12-15 +------------------- + +- Preserve empty query strings during normalization diff --git a/docs/source/release-notes/0.4.0.rst b/docs/source/release-notes/0.4.0.rst new file mode 100644 index 0000000..135af29 --- /dev/null +++ b/docs/source/release-notes/0.4.0.rst @@ -0,0 +1,8 @@ +0.4.0 -- 2016-08-20 +------------------- + +- Add ``ParseResult.from_parts`` and ``ParseResultBytes.from_parts`` class + methods to easily create a ParseResult + +- When using regular expressions, use ``[0-9]`` instead of ``\d`` to avoid + finding ports with "numerals" that are not valid in a port diff --git a/docs/source/release-notes/0.4.1.rst b/docs/source/release-notes/0.4.1.rst new file mode 100644 index 0000000..9bda6b4 --- /dev/null +++ b/docs/source/release-notes/0.4.1.rst @@ -0,0 +1,5 @@ +0.4.1 -- 2016-08-22 +------------------- + +- Normalize URIs constructed using ``ParseResult.from_parts`` and + ``ParseResultBytes.from_parts`` diff --git a/docs/source/release-notes/0.4.2.rst b/docs/source/release-notes/0.4.2.rst new file mode 100644 index 0000000..96d88b3 --- /dev/null +++ b/docs/source/release-notes/0.4.2.rst @@ -0,0 +1,5 @@ +0.4.2 -- 2016-08-22 +------------------- + +- Avoid parsing an string with just an IPv6 address as having a scheme of + ``[``. diff --git a/docs/source/release-notes/1.0.0.rst b/docs/source/release-notes/1.0.0.rst new file mode 100644 index 0000000..3862001 --- /dev/null +++ b/docs/source/release-notes/1.0.0.rst @@ -0,0 +1,26 @@ +1.0.0 -- 2017-05-10 +------------------- + +- Add support for :rfc:`6874` - Zone Identifiers in IPv6 Addresses + + See also `issue #2`_ + +- Add a more flexible and usable validation framework. See our documentation + for more information. + +- Add an object to aid in building new URIs from scratch. See our + documentation for more information. + +- Add real documentation for the entire module. + +- Add separate submodule with documented regular expression strings for the + collected ABNF. + +- Allow ``None`` to be used to eliminate components via ``copy_with`` for URIs + and ParseResults. + +- Move release history into our documentation. + +.. links +.. _issue #2: + https://github.com/sigmavirus24/rfc3986/issues/2 diff --git a/docs/source/release-notes/index.rst b/docs/source/release-notes/index.rst new file mode 100644 index 0000000..9db3f71 --- /dev/null +++ b/docs/source/release-notes/index.rst @@ -0,0 +1,28 @@ +=========================== + Release Notes and History +=========================== + +All of the release notes that have been recorded for |rfc3986| are organized +here with the newest releases first. + +1.x Release Series +================== + +.. toctree:: + + 1.0.0 + +0.x Release Series +================== + +.. toctree:: + + 0.4.2 + 0.4.1 + 0.4.0 + 0.3.1 + 0.3.0 + 0.2.2 + 0.2.1 + 0.2.0 + 0.1.0 diff --git a/setup.py b/setup.py index b1173ea..0a05aeb 100755 --- a/setup.py +++ b/setup.py @@ -16,14 +16,11 @@ packages = [ with io.open('README.rst', encoding='utf-8') as f: readme = f.read() -with io.open('HISTORY.rst', encoding='utf-8') as f: - history = f.read() - setuptools.setup( name='rfc3986', version=rfc3986.__version__, description='Validating URI References per RFC 3986', - long_description=readme + '\n\n' + history, + long_description=readme, author='Ian Cordasco', author_email='graffatcolmingov@gmail.com', url='http://rfc3986.readthedocs.io', From b075f2651e9a85e81a3f7c9bc74c7711c6df87bb Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Wed, 10 May 2017 06:54:24 -0500 Subject: [PATCH 34/34] Finalize 1.0.0 version string --- src/rfc3986/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rfc3986/__init__.py b/src/rfc3986/__init__.py index 89e7b97..81bb9c4 100644 --- a/src/rfc3986/__init__.py +++ b/src/rfc3986/__init__.py @@ -34,7 +34,7 @@ __author__ = 'Ian Cordasco' __author_email__ = 'graffatcolmingov@gmail.com' __license__ = 'Apache v2.0' __copyright__ = 'Copyright 2014 Rackspace' -__version__ = '1.0.0.0b0' +__version__ = '1.0.0' __all__ = ( 'ParseResult',