diff --git a/.travis.yml b/.travis.yml index 7f51e25..510237d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,6 @@ script: tox matrix: include: - - python: 2.6 - env: TOXENV=py26 - python: 2.7 env: TOXENV=py27 - python: 3.3 @@ -19,10 +17,8 @@ matrix: env: TOXENV=py35 - python: pypy env: TOXENV=pypy - - python: 2.7 - env: TOXENV=py27-flake8 - - python: 3.4 - env: TOXENV=py34-flake8 + - python: 3.5 + env: TOXENV=flake8 #- env: TOXENV=docs notifications: diff --git a/HISTORY.rst b/HISTORY.rst deleted file mode 100644 index b756066..0000000 --- a/HISTORY.rst +++ /dev/null @@ -1,63 +0,0 @@ -0.4.2 -- 2016-08-22 -------------------- - -- Avoid parsing an string with just an IPv6 address as having a scheme of - ``[``. - -0.4.1 -- 2016-08-22 -------------------- - -- Normalize URIs constructed using ``ParseResult.from_parts`` and - ``ParseResultBytes.from_parts`` - -0.4.0 -- 2016-08-20 -------------------- - -- Add ``ParseResult.from_parts`` and ``ParseResultBytes.from_parts`` class - methods to easily create a ParseResult - -- When using regular expressions, use ``[0-9]`` instead of ``\d`` to avoid - finding ports with "numerals" that are not valid in a port - -0.3.1 -- 2015-12-15 -------------------- - -- Preserve empty query strings during normalization - -0.3.0 -- 2015-10-20 -------------------- - -- Read README and HISTORY files using the appropriate codec so rfc3986 can be - installed on systems with locale's other than utf-8 (specifically C) - -- Replace the standard library's urlparse behaviour - -0.2.2 -- 2015-05-27 -------------------- - -- Update the regular name regular expression to accept all of the characters - allowed in the RFC. Closes bug #11 (Thanks Viktor Haag). Previously URIs - similar to "http://http-bin.org" would be considered invalid. - -0.2.1 -- 2015-03-20 -------------------- - -- Check that the bytes of an IPv4 Host Address are within the valid range. - Otherwise, URIs like "http://256.255.255.0/v1/resource" are considered - valid. - -- Add 6 to the list of unreserved characters. It was previously missing. - Closes bug #9 - -0.2.0 -- 2014-06-30 -------------------- - -- Add support for requiring components during validation. This includes adding - parameters ``require_scheme``, ``require_authority``, ``require_path``, - ``require_path``, ``require_query``, and ``require_fragment`` to - ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. - -0.1.0 -- 2014-06-27 -------------------- - -- Initial Release includes validation and normalization of URIs diff --git a/MANIFEST.in b/MANIFEST.in index 74148bc..4cf3d01 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,8 @@ include README.rst include LICENSE -include HISTORY.rst include AUTHORS.rst include setup.cfg prune *.pyc -#recursive-include docs *.rst *.py Makefile +recursive-include docs *.rst *.py recursive-include tests *.py prune docs/_build diff --git a/README.rst b/README.rst index 62e4974..0d3dcb6 100644 --- a/README.rst +++ b/README.rst @@ -2,12 +2,12 @@ rfc3986 ======= A Python implementation of `RFC 3986`_ including validation and authority -parsing. Coming soon: `Reference Resolution `_. +parsing. Installation ------------ -Simply use pip to install ``rfc3986`` like so:: +Use pip to install ``rfc3986`` like so:: pip install rfc3986 diff --git a/docs/source/api-ref/api.rst b/docs/source/api-ref/api.rst new file mode 100644 index 0000000..646dc24 --- /dev/null +++ b/docs/source/api-ref/api.rst @@ -0,0 +1,9 @@ +=============== + API Submodule +=============== + +.. autofunction:: rfc3986.api.urlparse + +.. autofunction:: rfc3986.api.uri_reference + +.. autofunction:: rfc3986.api.normalize_uri diff --git a/docs/source/api-ref/builder.rst b/docs/source/api-ref/builder.rst new file mode 100644 index 0000000..a77ec53 --- /dev/null +++ b/docs/source/api-ref/builder.rst @@ -0,0 +1,23 @@ +==================== + URI Builder Module +==================== + +.. autoclass:: rfc3986.builder.URIBuilder + +.. automethod:: rfc3986.builder.URIBuilder.add_scheme + +.. automethod:: rfc3986.builder.URIBuilder.add_credentials + +.. automethod:: rfc3986.builder.URIBuilder.add_host + +.. automethod:: rfc3986.builder.URIBuilder.add_port + +.. automethod:: rfc3986.builder.URIBuilder.add_path + +.. automethod:: rfc3986.builder.URIBuilder.add_query_from + +.. automethod:: rfc3986.builder.URIBuilder.add_query + +.. automethod:: rfc3986.builder.URIBuilder.add_fragment + +.. automethod:: rfc3986.builder.URIBuilder.finalize diff --git a/docs/source/api-ref/index.rst b/docs/source/api-ref/index.rst new file mode 100644 index 0000000..7c0f58c --- /dev/null +++ b/docs/source/api-ref/index.rst @@ -0,0 +1,16 @@ +=============== + API Reference +=============== + +This section contains API documentation generated from the source code of +|rfc3986|. If you're looking for an introduction to the module and how it +can be utilized, please see :ref:`narrative` instead. + +.. toctree:: + :maxdepth: 1 + + api + builder + uri + validators + miscellaneous diff --git a/docs/source/api-ref/miscellaneous.rst b/docs/source/api-ref/miscellaneous.rst new file mode 100644 index 0000000..08ea7ff --- /dev/null +++ b/docs/source/api-ref/miscellaneous.rst @@ -0,0 +1,231 @@ +========================== + Miscellaneous Submodules +========================== + +There are several submodules in |rfc3986| that are not meant to be exposed to +users directly but which are valuable to document, regardless. + +.. data:: rfc3986.misc.UseExisting + + A sentinel object to make certain APIs simpler for users. + +.. module:: rfc3986.abnf_regexp + +The :mod:`rfc3986.abnf_regexp` module contains the regular expressions written +from the RFC's ABNF. The :mod:`rfc3986.misc` module contains compiled regular +expressions from :mod:`rfc3986.abnf_regexp` and previously contained those +regular expressions. + +.. data:: rfc3986.abnf_regexp.GEN_DELIMS +.. data:: rfc3986.abnf_regexp.GENERIC_DELIMITERS + + The string containing all of the generic delimiters as defined on + `page 13 `__. + +.. data:: rfc3986.abnf_regexp.GENERIC_DELIMITERS_SET + + :data:`rfc3986.abnf_regexp.GEN_DELIMS` represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMS +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS + + The string containing all of the 'sub' delimiters as defined on + `page 13 `__. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS_SET + + :data:`rfc3986.abnf_regexp.SUB_DELIMS` represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.SUB_DELIMITERS_RE + + :data:`rfc3986.abnf_regexp.SUB_DELIMS` with the ``*`` escaped for use in + regular expressions. + +.. data:: rfc3986.abnf_regexp.RESERVED_CHARS_SET + + A :class:`set` constructed of :data:`GEN_DELIMS` and :data:`SUB_DELIMS`. + This union is defined on `page 13 + `__. + +.. data:: rfc3986.abnf_regexp.ALPHA + + The string of upper- and lower-case letters in USASCII. + +.. data:: rfc3986.abnf_regexp.DIGIT + + The string of digits 0 through 9. + +.. data:: rfc3986.abnf_regexp.UNRESERVED +.. data:: rfc3986.abnf_regexp.UNRESERVED_CHARS + + The string of unreserved characters defined in :rfc:`3986#section-2.3`. + +.. data:: rfc3986.abnf_regexp.UNRESERVED_CHARS_SET + + :data:`rfc3986.abnf_regexp.UNRESERVED_CHARS` represented as a + :class:`set`. + +.. data:: rfc3986.abnf_regexp.NON_PCT_ENCODED_SET + + The non-percent encoded characters represented as a :class:`set`. + +.. data:: rfc3986.abnf_regexp.UNRESERVED_RE + + Optimized regular expression for unreserved characters. + +.. data:: rfc3986.abnf_regexp.SCHEME_RE + + Stricter regular expression to match and validate the scheme part + of a URI. + +.. data:: rfc3986.abnf_regexp.COMPONENT_PATTERN_DICT + + Dictionary with regular expressions to match various components in + a URI. Except for :data:`rfc3986.abnf_regexp.SCHEME_RE`, all patterns + are from :rfc:`3986#appendix-B`. + +.. data:: rfc3986.abnf_regexp.URL_PARSING_RE + + Regular expression compposed from the components in + :data:`rfc3986.abnf_regexp.COMPONENT_PATTERN_DICT`. + +.. data:: rfc3986.abnf_regexp.HEXDIG_RE + + Hexadecimal characters used in each piece of an IPv6 address. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.LS32_RE + + Lease significant 32 bits of an IPv6 address. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.REG_NAME +.. data:: rfc3986.abnf_regexp.REGULAR_NAME_RE + + The pattern for a regular name, e.g., ``www.google.com``, + ``api.github.com``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv4_RE + + The pattern for an IPv4 address, e.g., ``192.168.255.255``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv6_RE + + The pattern for an IPv6 address, e.g., ``::1``. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IPv_FUTURE_RE + + A regular expression to parse out IPv Futures. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.IP_LITERAL_RE + + Pattern to match IPv6 addresses and IPv Future addresses. + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.HOST_RE +.. data:: rfc3986.abnf_regexp.HOST_PATTERN + + Pattern to match and validate the host piece of an authority. + This is composed of + + - :data:`rfc3986.abnf_regexp.REG_NAME` + - :data:`rfc3986.abnf_regexp.IPv4_RE` + - :data:`rfc3986.abnf_regexp.IP_LITERAL_RE` + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.USERINFO_RE + + Pattern to match and validate the user information portion of + an authority component. + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.PORT_RE + + Pattern to match and validate the port portion of an authority + component. + + See :rfc:`3986#section-3.2.2`. + +.. data:: rfc3986.abnf_regexp.PCT_ENCODED +.. data:: rfc3986.abnf_regexp.PERCENT_ENCODED + + Regular expression to match percent encoded character values. + +.. data:: rfc3986.abnf_regexp.PCHAR + + Regular expression to match printable characters. + +.. data:: rfc3986.abnf_regexp.PATH_RE + + Regular expression to match and validate the path component of a URI. + + See :rfc:`3986#section-3.3`. + +.. data:: rfc3986.abnf_regexp.PATH_EMPTY +.. data:: rfc3986.abnf_regexp.PATH_ROOTLESS +.. data:: rfc3986.abnf_regexp.PATH_NOSCHEME +.. data:: rfc3986.abnf_regexp.PATH_ABSOLUTE +.. data:: rfc3986.abnf_regexp.PATH_ABEMPTY + + Components of the :data:`rfc3986.abnf_regexp.PATH_RE`. + + See :rfc:`3986#section-3.3`. + +.. data:: rfc3986.abnf_regexp.QUERY_RE + + Regular expression to parse and validate the query component of a URI. + +.. data:: rfc3986.abnf_regexp.FRAGMENT_RE + + Regular expression to parse and validate the fragment component of a URI. + +.. data:: rfc3986.abnf_regexp.RELATIVE_PART_RE + + Regular expression to parse the relative URI when resolving URIs. + +.. data:: rfc3986.abnf_regexp.HIER_PART_RE + + The hierarchical part of a URI. This regular expression is used when + resolving relative URIs. + + See :rfc:`3986#section-3`. + +.. module:: rfc3986.misc + +.. data:: rfc3986.misc.URI_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.URL_PARSING_RE`. + +.. data:: rfc3986.misc.SUBAUTHORITY_MATCHER + + Compiled compilation of :data:`rfc3986.abnf_regexp.USERINFO_RE`, + :data:`rfc3986.abnf_regexp.HOST_PATTERN`, + :data:`rfc3986.abnf_regexp.PORT_RE`. + +.. data:: rfc3986.misc.SCHEME_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.SCHEME_RE`. + +.. data:: rfc3986.misc.IPv4_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.IPv4_RE`. + +.. data:: rfc3986.misc.PATH_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.PATH_RE`. + +.. data:: rfc3986.misc.QUERY_MATCHER + + Compiled version of :data:`rfc3986.abnf_regexp.QUERY_RE`. + +.. data:: rfc3986.misc.RELATIVE_REF_MATCHER + + Compiled compilation of :data:`rfc3986.abnf_regexp.SCHEME_RE`, + :data:`rfc3986.abnf_regexp.HIER_PART_RE`, + :data:`rfc3986.abnf_regexp.QUERY_RE`. diff --git a/docs/source/api-ref/uri.rst b/docs/source/api-ref/uri.rst new file mode 100644 index 0000000..bedc9f4 --- /dev/null +++ b/docs/source/api-ref/uri.rst @@ -0,0 +1,19 @@ +=============== + URI Submodule +=============== + +.. autoclass:: rfc3986.uri.URIReference + +.. automethod:: rfc3986.uri.URIReference.from_string + +.. automethod:: rfc3986.uri.URIReference.unsplit + +.. automethod:: rfc3986.uri.URIReference.resolve_with + +.. automethod:: rfc3986.uri.URIReference.copy_with + +.. automethod:: rfc3986.uri.URIReference.normalize + +.. automethod:: rfc3986.uri.URIReference.is_absolute + +.. automethod:: rfc3986.uri.URIReference.authority_info diff --git a/docs/source/api-ref/validators.rst b/docs/source/api-ref/validators.rst new file mode 100644 index 0000000..eaf078f --- /dev/null +++ b/docs/source/api-ref/validators.rst @@ -0,0 +1,19 @@ +====================== + Validators Submodule +====================== + +.. autoclass:: rfc3986.validators.Validator + +.. automethod:: rfc3986.validators.Validator.allow_schemes + +.. automethod:: rfc3986.validators.Validator.allow_hosts + +.. automethod:: rfc3986.validators.Validator.allow_ports + +.. automethod:: rfc3986.validators.Validator.allow_use_of_password + +.. automethod:: rfc3986.validators.Validator.forbid_use_of_password + +.. automethod:: rfc3986.validators.Validator.require_presence_of + +.. automethod:: rfc3986.validators.Validator.validate diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..8900180 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +# +# rfc3986 documentation build configuration file, created by +# sphinx-quickstart on Tue Mar 14 07:06:46 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' +rst_epilog = """ +.. |rfc3986| replace:: :mod:`rfc3986` +""" + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage', + 'sphinx-prompt', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'rfc3986' +copyright = u'2017, Ian Cordasco' +author = u'Ian Cordasco' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = u'1.0.0' +# The full version, including alpha/beta/rc tags. +release = u'1.0.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'rfc3986doc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'rfc3986.tex', u'rfc3986 Documentation', + u'Ian Cordasco', 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'rfc3986', u'rfc3986 Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'rfc3986', u'rfc3986 Documentation', + author, 'rfc3986', 'One line description of project.', + 'Miscellaneous'), +] + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..757a427 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,29 @@ +========= + rfc3986 +========= + +|rfc3986| is a Python implementation of :rfc:`3986` including validation and +authority parsing. This module also supports :rfc:`6874` which adds support +for zone identifiers to IPv6 Addresses. + +The maintainers strongly suggest using `pip`_ to install |rfc3986|. For +example, + +.. prompt:: bash + + pip install rfc3986 + python -m pip install rfc3986 + python3.6 -m pip install rfc3986 + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + narrative + api-ref/index + release-notes/index + + +.. links +.. _pip: + https://pypi.python.org/pypi/pip/ diff --git a/docs/source/narrative.rst b/docs/source/narrative.rst new file mode 100644 index 0000000..1676c41 --- /dev/null +++ b/docs/source/narrative.rst @@ -0,0 +1,29 @@ +.. _narrative: + +==================== + User Documentation +==================== + +|rfc3986| has several API features and convenience methods. The core of +|rfc3986|'s API revolves around parsing, validating, and building URIs. + +There is an API to provide compatibility with :mod:`urllib.parse`, there is an +API to parse a URI as a URI Reference, there's an API to provide validation of +URIs, and finally there's an API to build URIs. + +.. note:: + + There's presently no support for IRIs as defined in :rfc:`3987`. + +|rfc3986| parses URIs much differently from :mod:`urllib.parse` so users may +see some subtle differences with very specific URLs that contain rough +edgecases. Regardless, we do our best to implement the same API so you should +be able to seemlessly swap |rfc3986| for ``urlparse``. + + +.. toctree:: + :maxdepth: 2 + + user/parsing + user/validating + user/building diff --git a/docs/source/release-notes/0.1.0.rst b/docs/source/release-notes/0.1.0.rst new file mode 100644 index 0000000..599ac8c --- /dev/null +++ b/docs/source/release-notes/0.1.0.rst @@ -0,0 +1,4 @@ +0.1.0 -- 2014-06-27 +------------------- + +- Initial Release includes validation and normalization of URIs diff --git a/docs/source/release-notes/0.2.0.rst b/docs/source/release-notes/0.2.0.rst new file mode 100644 index 0000000..dab34a7 --- /dev/null +++ b/docs/source/release-notes/0.2.0.rst @@ -0,0 +1,7 @@ +0.2.0 -- 2014-06-30 +------------------- + +- Add support for requiring components during validation. This includes adding + parameters ``require_scheme``, ``require_authority``, ``require_path``, + ``require_path``, ``require_query``, and ``require_fragment`` to + ``rfc3986.is_valid_uri`` and ``URIReference#is_valid``. diff --git a/docs/source/release-notes/0.2.1.rst b/docs/source/release-notes/0.2.1.rst new file mode 100644 index 0000000..4706e44 --- /dev/null +++ b/docs/source/release-notes/0.2.1.rst @@ -0,0 +1,9 @@ +0.2.1 -- 2015-03-20 +------------------- + +- Check that the bytes of an IPv4 Host Address are within the valid range. + Otherwise, URIs like "http://256.255.255.0/v1/resource" are considered + valid. + +- Add 6 to the list of unreserved characters. It was previously missing. + Closes bug #9 diff --git a/docs/source/release-notes/0.2.2.rst b/docs/source/release-notes/0.2.2.rst new file mode 100644 index 0000000..2ad7815 --- /dev/null +++ b/docs/source/release-notes/0.2.2.rst @@ -0,0 +1,7 @@ +0.2.2 -- 2015-05-27 +------------------- + +- Update the regular name regular expression to accept all of the characters + allowed in the RFC. Closes bug #11 (Thanks Viktor Haag). Previously URIs + similar to "http://http-bin.org" would be considered invalid. + diff --git a/docs/source/release-notes/0.3.0.rst b/docs/source/release-notes/0.3.0.rst new file mode 100644 index 0000000..9a1954f --- /dev/null +++ b/docs/source/release-notes/0.3.0.rst @@ -0,0 +1,7 @@ +0.3.0 -- 2015-10-20 +------------------- + +- Read README and HISTORY files using the appropriate codec so rfc3986 can be + installed on systems with locale's other than utf-8 (specifically C) + +- Replace the standard library's urlparse behaviour diff --git a/docs/source/release-notes/0.3.1.rst b/docs/source/release-notes/0.3.1.rst new file mode 100644 index 0000000..579368e --- /dev/null +++ b/docs/source/release-notes/0.3.1.rst @@ -0,0 +1,4 @@ +0.3.1 -- 2015-12-15 +------------------- + +- Preserve empty query strings during normalization diff --git a/docs/source/release-notes/0.4.0.rst b/docs/source/release-notes/0.4.0.rst new file mode 100644 index 0000000..135af29 --- /dev/null +++ b/docs/source/release-notes/0.4.0.rst @@ -0,0 +1,8 @@ +0.4.0 -- 2016-08-20 +------------------- + +- Add ``ParseResult.from_parts`` and ``ParseResultBytes.from_parts`` class + methods to easily create a ParseResult + +- When using regular expressions, use ``[0-9]`` instead of ``\d`` to avoid + finding ports with "numerals" that are not valid in a port diff --git a/docs/source/release-notes/0.4.1.rst b/docs/source/release-notes/0.4.1.rst new file mode 100644 index 0000000..9bda6b4 --- /dev/null +++ b/docs/source/release-notes/0.4.1.rst @@ -0,0 +1,5 @@ +0.4.1 -- 2016-08-22 +------------------- + +- Normalize URIs constructed using ``ParseResult.from_parts`` and + ``ParseResultBytes.from_parts`` diff --git a/docs/source/release-notes/0.4.2.rst b/docs/source/release-notes/0.4.2.rst new file mode 100644 index 0000000..96d88b3 --- /dev/null +++ b/docs/source/release-notes/0.4.2.rst @@ -0,0 +1,5 @@ +0.4.2 -- 2016-08-22 +------------------- + +- Avoid parsing an string with just an IPv6 address as having a scheme of + ``[``. diff --git a/docs/source/release-notes/1.0.0.rst b/docs/source/release-notes/1.0.0.rst new file mode 100644 index 0000000..3862001 --- /dev/null +++ b/docs/source/release-notes/1.0.0.rst @@ -0,0 +1,26 @@ +1.0.0 -- 2017-05-10 +------------------- + +- Add support for :rfc:`6874` - Zone Identifiers in IPv6 Addresses + + See also `issue #2`_ + +- Add a more flexible and usable validation framework. See our documentation + for more information. + +- Add an object to aid in building new URIs from scratch. See our + documentation for more information. + +- Add real documentation for the entire module. + +- Add separate submodule with documented regular expression strings for the + collected ABNF. + +- Allow ``None`` to be used to eliminate components via ``copy_with`` for URIs + and ParseResults. + +- Move release history into our documentation. + +.. links +.. _issue #2: + https://github.com/sigmavirus24/rfc3986/issues/2 diff --git a/docs/source/release-notes/index.rst b/docs/source/release-notes/index.rst new file mode 100644 index 0000000..9db3f71 --- /dev/null +++ b/docs/source/release-notes/index.rst @@ -0,0 +1,28 @@ +=========================== + Release Notes and History +=========================== + +All of the release notes that have been recorded for |rfc3986| are organized +here with the newest releases first. + +1.x Release Series +================== + +.. toctree:: + + 1.0.0 + +0.x Release Series +================== + +.. toctree:: + + 0.4.2 + 0.4.1 + 0.4.0 + 0.3.1 + 0.3.0 + 0.2.2 + 0.2.1 + 0.2.0 + 0.1.0 diff --git a/docs/source/user/building.rst b/docs/source/user/building.rst new file mode 100644 index 0000000..8fe6a82 --- /dev/null +++ b/docs/source/user/building.rst @@ -0,0 +1,116 @@ +=============== + Building URIs +=============== + +Constructing URLs often seems simple. There are some problems with +concatenating strings to build a URL: + +- Certain parts of the URL disallow certain characters + +- Formatting some parts of the URL is tricky and doing it manually isn't fun + +To make the experience better |rfc3986| provides the +:class:`~rfc3986.builder.URIBuilder` class to generate valid +:class:`~rfc3986.uri.URIReference` instances. The +:class:`~rfc3986.builder.URIBuilder` class will handle ensuring that each +component is normalized and safe for real world use. + + +Example Usage +============= + +.. note:: + + All of the methods on a :class:`~rfc3986.builder.URIBuilder` are + chainable (except :meth:`~rfc3986.builder.URIBuilder.finalize`). + +Let's build a basic URL with just a scheme and host. First we create an +instance of :class:`~rfc3986.builder.URIBuilder`. Then we call +:meth:`~rfc3986.builder.URIBuilder.add_scheme` and +:meth:`~rfc3986.builder.URIBuilder.add_host` with the scheme and host +we want to include in the URL. Then we convert our builder object into +a :class:`~rfc3986.uri.URIReference` and call +:meth:`~rfc3986.uri.URIReference.unsplit`. + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'github.com' + ... ).finalize().unsplit()) + https://github.com + +Each time you invoke a method, you get a new instance of a +:class:`~rfc3986.builder.URIBuilder` class so you can build several different +URLs from one base instance. + +.. doctest:: + + >>> from rfc3986 import builder + >>> github_builder = builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'api.github.com' + ... ) + >>> print(github_builder.add_path( + ... '/users/sigmavirus24' + ... ).finalize().unsplit()) + https://api.github.com/users/sigmavirus24 + >>> print(github_builder.add_path( + ... '/repos/sigmavirus24/rfc3986' + ... ).finalize().unsplit()) + https://api.github.com/repos/sigmavirus24/rfc3986 + +|rfc3986| makes adding authentication credentials convenient. It takes care of +making the credentials URL safe. There are some characters someone might want +to include in a URL that are not safe for the authority component of a URL. + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'api.github.com' + ... ).add_credentials( + ... username='us3r', + ... password='p@ssw0rd', + ... ).finalize().unsplit()) + https://us3r:p%40ssw0rd@api.github.com + +Further, |rfc3986| attempts to simplify the process of adding query parameters +to a URL. For example, if we were using Elasticsearch, we might do something +like: + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'search.example.com' + ... ).add_path( + ... '_search' + ... ).add_query_from( + ... [('q', 'repo:sigmavirus24/rfc3986'), ('sort', 'created_at:asc')] + ... ).finalize().unsplit()) + https://search.example.com/_search?q=repo%3Asigmavirus24%2Frfc3986&sort=created_at%3Aasc + +Finally, we provide a way to add a fragment to a URL. Let's build up a URL to +view the section of the RFC that refers to fragments: + +.. doctest:: + + >>> from rfc3986 import builder + >>> print(builder.URIBuilder().add_scheme( + ... 'https' + ... ).add_host( + ... 'tools.ietf.org' + ... ).add_path( + ... '/html/rfc3986' + ... ).add_fragment( + ... 'section-3.5' + ... ).finalize().unsplit()) + https://tools.ietf.org/html/rfc3986#section-3.5 diff --git a/docs/source/user/parsing.rst b/docs/source/user/parsing.rst new file mode 100644 index 0000000..5ec39cf --- /dev/null +++ b/docs/source/user/parsing.rst @@ -0,0 +1,147 @@ +=============== + Parsing a URI +=============== + +There are two ways to parse a URI with |rfc3986| + +#. :meth:`rfc3986.api.uri_reference` + + This is best when you're **not** replacing existing usage of + :mod:`urllib.parse`. This also provides convenience methods around safely + normalizing URIs passed into it. + +#. :meth:`rfc3986.api.urlparse` + + This is best suited to completely replace :func:`urllib.parse.urlparse`. + It returns a class that should be indistinguishable from + :class:`urllib.parse.ParseResult` + +Let's look at some code samples. + + +Some Examples +============= + +First we'll parse the URL that points to the repository for this project. + +.. testsetup:: * + + import rfc3986 + url = rfc3986.urlparse('https://github.com/sigmavirus24/rfc3986') + uri = rfc3986.uri_reference('https://github.com/sigmavirus24/rfc3986') + +.. code-block:: python + + url = rfc3986.urlparse('https://github.com/sigmavirus24/rfc3986') + + +Then we'll replace parts of that URL with new values: + +.. testcode:: ex0 + + print(url.copy_with( + userinfo='username:password', + port='443', + ).unsplit()) + +.. testoutput:: ex0 + + https://username:password@github.com:443/sigmavirus24/rfc3986 + +This, however, does not change the current ``url`` instance of +:class:`~rfc3986.parseresult.ParseResult`. As the method name might suggest, +we're copying that instance and then overriding certain attributes. +In fact, we can make as many copies as we like and nothing will change. + +.. testcode:: ex1 + + print(url.copy_with( + scheme='ssh', + userinfo='git', + ).unsplit()) + +.. testoutput:: ex1 + + ssh://git@github.com/sigmavirus24/rfc3986 + +.. testcode:: ex1 + + print(url.scheme) + +.. testoutput:: ex1 + + https + +We can do similar things with URI References as well. + +.. code-block:: python + + uri = rfc3986.uri_reference('https://github.com/sigmavirus24/rfc3986') + +.. testcode:: ex2 + + print(uri.copy_with( + authority='username:password@github.com:443', + path='/sigmavirus24/github3.py', + ).unsplit()) + +.. testoutput:: ex2 + + https://username:password@github.com:443/sigmavirus24/github3.py + +However, URI References may have some unexpected behaviour based strictly on +the RFC. + +Finally, if you want to remove a component from a URI, you may pass ``None`` +to remove it, for example: + +.. testcode:: ex3 + + print(uri.copy_with(path=None).unsplit()) + +.. testoutput:: ex3 + + https://github.com + +This will work on both URI References and Parse Results. + + +And Now For Something Slightly Unusual +====================================== + +If you are familiar with GitHub, GitLab, or a similar service, you may have +interacted with the "SSH URL" for some projects. For this project, +the SSH URL is: + +.. code:: + + git@github.com:sigmavirus24/rfc3986 + + +Let's see what happens when we parse this. + +.. code-block:: pycon + + >>> rfc3986.uri_reference('git@github.com:sigmavirus24/rfc3986') + URIReference(scheme=None, authority=None, + path=u'git@github.com:sigmavirus24/rfc3986', query=None, fragment=None) + +There's no scheme present, but it is apparent to our (human) eyes that +``git@github.com`` should not be part of the path. This is one of the areas +where :mod:`rfc3986` suffers slightly due to its strict conformance to +:rfc:`3986`. In the RFC, an authority must be preceded by ``//``. Let's see +what happens when we add that to our URI + +.. code-block:: pycon + + >>> rfc3986.uri_reference('//git@github.com:sigmavirus24/rfc3986') + URIReference(scheme=None, authority=u'git@github.com:sigmavirus24', + path=u'/rfc3986', query=None, fragment=None) + +Somewhat better, but not much. + +.. note:: + + The maintainers of :mod:`rfc3986` are working to discern better ways to + parse these less common URIs in a reasonable and sensible way without + losing conformance to the RFC. diff --git a/docs/source/user/validating.rst b/docs/source/user/validating.rst new file mode 100644 index 0000000..26a0444 --- /dev/null +++ b/docs/source/user/validating.rst @@ -0,0 +1,140 @@ +================= + Validating URIs +================= + +While not as difficult as `validating an email address`_, validating URIs is +tricky. Different parts of the URI allow different characters. Those sets +sometimes overlap and othertimes they don't and it's not very convenient. +Luckily, |rfc3986| makes validating URIs far simpler. + +Example Usage +============= + +First we need to create an instance of a +:class:`~rfc3986.validators.Validator` which takes no parameters. After that +we can call methods on the instance to indicate what we want to validate. + +Let's assume that we're building something that takes user input for a URl and +we want to ensure that URL is only ever using a specific domain with https. In +that case, our code would look like this: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ) + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + >>> validator.validate(uri_reference( + ... 'https://github.com/' + ... )) + >>> validator.validate(uri_reference( + ... 'http://example.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.UnpermittedComponentError + +First notice that we can easily reuse our validator object for each URL. +This allows users to not have to constantly reconstruct Validators for each +bit of user input. Next, we have three different URLs that we validate: + +#. ``https://github.com/sigmavirus24/rfc3986`` +#. ``https://github.com/`` +#. ``http://example.com`` + +As it stands, our validator will allow the first two URLs to pass but will +fail the third. This is specifically because we only allow URLs using +``https`` as a scheme and ``github.com`` as the domain name. + +Next, let's imagine that we want to prevent leaking user credentials. In that +case, we want to ensure that there is no password in the user information +portion of the authority. In that case, our new validator would look like this: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password() + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + >>> validator.validate(uri_reference( + ... 'https://github.com/' + ... )) + >>> validator.validate(uri_reference( + ... 'http://example.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.UnpermittedComponentError + >>> validator.validate(uri_reference( + ... 'https://sigmavirus24@github.com' + ... )) + >>> validator.validate(uri_reference( + ... 'https://sigmavirus24:not-my-real-password@github.com' + ... )) + Traceback (most recent call last): + ... + rfc3986.exceptions.PasswordForbidden + +Up until now, we have assumed that we will get a URL that has the appropriate +components for validation. For example, we assume that we will have a URL that +has a scheme and hostname. However, our current validation doesn't require +those items exist. + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password() + >>> validator.validate(uri_reference('//github.com')) + >>> validator.validate(uri_reference('https:/')) + +In the first case, we have a host name but no scheme and in the second we have +a scheme and a path but no host. If we want to ensure that those components +are there and that they are *always* what we allow, then we must add one last +item to our validator: + +.. doctest:: + + >>> from rfc3986 import validators, uri_reference + >>> user_url = 'https://github.com/sigmavirus24/rfc3986' + >>> validator = validators.Validator().allow_schemes( + ... 'https', + ... ).allow_hosts( + ... 'github.com', + ... ).forbid_use_of_password( + ... ).require_presence_of( + ... 'scheme', 'host', + ... ) + >>> validator.validate(uri_reference('//github.com')) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError + >>> validator.validate(uri_reference('https:/')) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError + >>> validator.validate(uri_reference('https://github.com')) + >>> validator.validate(uri_reference( + ... 'https://github.com/sigmavirus24/rfc3986' + ... )) + +.. links + +.. _validating an email address: + http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx/ diff --git a/rfc3986/exceptions.py b/rfc3986/exceptions.py deleted file mode 100644 index f9adbde..0000000 --- a/rfc3986/exceptions.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -class RFC3986Exception(Exception): - pass - - -class InvalidAuthority(RFC3986Exception): - def __init__(self, authority): - super(InvalidAuthority, self).__init__( - "The authority ({0}) is not valid.".format(authority)) - - -class InvalidPort(RFC3986Exception): - def __init__(self, port): - super(InvalidPort, self).__init__( - 'The port ("{0}") is not valid.'.format(port)) - - -class ResolutionError(RFC3986Exception): - def __init__(self, uri): - super(ResolutionError, self).__init__( - "{0} is not an absolute URI.".format(uri.unsplit())) diff --git a/rfc3986/misc.py b/rfc3986/misc.py deleted file mode 100644 index bb2ed82..0000000 --- a/rfc3986/misc.py +++ /dev/null @@ -1,219 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2014 Rackspace -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -rfc3986.misc -~~~~~~~~~~~~ - -This module contains important constants, patterns, and compiled regular -expressions for parsing and validating URIs and their components. -""" - -import re - -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment'] - -important_characters = { - 'generic_delimiters': ":/?#[]@", - 'sub_delimiters': "!$&'()*+,;=", - # We need to escape the '*' in this case - 're_sub_delimiters': "!$&'()\*+,;=", - 'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' - '0123456789._~-'), - # We need to escape the '-' in this case: - 're_unreserved': 'A-Za-z0-9._~\-', - } -# For details about delimiters and reserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.2 -GENERIC_DELIMITERS = set(important_characters['generic_delimiters']) -SUB_DELIMITERS = set(important_characters['sub_delimiters']) -RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS) -# For details about unreserved characters, see: -# http://tools.ietf.org/html/rfc3986#section-2.3 -UNRESERVED_CHARS = set(important_characters['unreserved_chars']) -NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%') - -# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B -component_pattern_dict = { - # NOTE(sigmavirus24): Our list of things we want to not match includes one - # item more than the RFC. We want to not parse the leading '[' from an - # IPv6 address into the scheme when provided something akin to: - # >>> rfc3986.uri_reference('[::1]') - # We would rather that appear to be a path than not. - 'scheme': '[^:/?#[]+', - 'authority': '[^/?#]*', - 'path': '[^?#]*', - 'query': '[^#]*', - 'fragment': '.*', - } - -# See http://tools.ietf.org/html/rfc3986#appendix-B -# In this case, we name each of the important matches so we can use -# SRE_Match#groupdict to parse the values out if we so choose. This is also -# modified to ignore other matches that are not important to the parsing of -# the reference so we can also simply use SRE_Match#groups. -expression = ('(?:(?P{scheme}):)?(?://(?P{authority}))?' - '(?P{path})(?:\?(?P{query}))?' - '(?:#(?P{fragment}))?' - ).format(**component_pattern_dict) - -URI_MATCHER = re.compile(expression) - -# ######################### -# Authority Matcher Section -# ######################### - -# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 -# The pattern for a regular name, e.g., www.google.com, api.github.com -reg_name = '(({0})*|[{1}]*)'.format( - '%[0-9A-Fa-f]{2}', - important_characters['re_sub_delimiters'] + - important_characters['re_unreserved'] - ) -# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, -ipv4 = '([0-9]{1,3}.){3}[0-9]{1,3}' -# Hexadecimal characters used in each piece of an IPv6 address -hexdig = '[0-9A-Fa-f]{1,4}' -# Least-significant 32 bits of an IPv6 address -ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4) -# Substitutions into the following patterns for IPv6 patterns defined -# http://tools.ietf.org/html/rfc3986#page-20 -subs = {'hex': hexdig, 'ls32': ls32} - -# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details -# about ABNF (Augmented Backus-Naur Form) use in the comments -variations = [ - # 6( h16 ":" ) ls32 - '(%(hex)s:){6}%(ls32)s' % subs, - # "::" 5( h16 ":" ) ls32 - '::(%(hex)s:){5}%(ls32)s' % subs, - # [ h16 ] "::" 4( h16 ":" ) ls32 - '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs, - # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs, - # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs, - # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs, - # [ *4( h16 ":" ) h16 ] "::" ls32 - '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs, - # [ *5( h16 ":" ) h16 ] "::" h16 - '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs, - # [ *6( h16 ":" ) h16 ] "::" - '((%(hex)s:){0,6}%(hex)s)?::' % subs, - ] - -ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations) - -ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % ( - important_characters['re_unreserved'] + - important_characters['re_sub_delimiters'] + - ':') - -ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future) - -# Pattern for matching the host piece of the authority -HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal) - -SUBAUTHORITY_MATCHER = re.compile(( - '^(?:(?P[A-Za-z0-9_.~\-%:]+)@)?' # userinfo - '(?P{0}?)' # host - ':?(?P[0-9]+)?$' # port - ).format(HOST_PATTERN)) - -IPv4_MATCHER = re.compile('^' + ipv4 + '$') - - -# #################### -# Path Matcher Section -# #################### - -# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information -# about the path patterns defined below. - -# Percent encoded character values -pct_encoded = '%[A-Fa-f0-9]{2}' -pchar = ('([' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ':@]|%s)' % pct_encoded) -segments = { - 'segment': pchar + '*', - # Non-zero length segment - 'segment-nz': pchar + '+', - # Non-zero length segment without ":" - 'segment-nz-nc': pchar.replace(':', '') + '+' - } - -# Path types taken from Section 3.3 (linked above) -path_empty = '^$' -path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments -path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments -path_absolute = '/(%s)?' % path_rootless -path_abempty = '(/%(segment)s)*' % segments - -# Matcher used to validate path components -PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % ( - path_abempty, path_absolute, path_noscheme, path_rootless, path_empty - )) - - -# ################################## -# Query and Fragment Matcher Section -# ################################## - -QUERY_MATCHER = re.compile( - '^([/?:@' + important_characters['re_unreserved'] - + important_characters['re_sub_delimiters'] - + ']|%s)*$' % pct_encoded) - -FRAGMENT_MATCHER = QUERY_MATCHER - -# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 -SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$') - -# Relative reference matcher - -# See http://tools.ietf.org/html/rfc3986#section-4.2 for details -relative_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_noscheme, path_empty - ) - -RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % ( - relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern - )) - -# See http://tools.ietf.org/html/rfc3986#section-3 for definition -hier_part = '(//%s%s|%s|%s|%s)' % ( - component_pattern_dict['authority'], path_abempty, path_absolute, - path_rootless, path_empty - ) - -# See http://tools.ietf.org/html/rfc3986#section-4.3 -ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % ( - component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1] - )) - - -# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri, relative_path): - """Merge a base URI's path with a relative URI's path.""" - if base_uri.path is None and base_uri.authority is not None: - return '/' + relative_path - else: - path = base_uri.path or '' - index = path.rfind('/') - return path[:index] + '/' + relative_path diff --git a/setup.py b/setup.py index 10ce851..0a05aeb 100755 --- a/setup.py +++ b/setup.py @@ -1,20 +1,14 @@ -#!/usr/bin/env python - +"""Packaging logic for the rfc3986 library.""" import io import os import sys +import setuptools + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) # noqa + import rfc3986 -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - -if sys.argv[-1] == 'publish': - os.system('python setup.py bdist_wheel sdist upload') - sys.exit() - packages = [ 'rfc3986', ] @@ -22,18 +16,16 @@ packages = [ with io.open('README.rst', encoding='utf-8') as f: readme = f.read() -with io.open('HISTORY.rst', encoding='utf-8') as f: - history = f.read() - -setup( +setuptools.setup( name='rfc3986', version=rfc3986.__version__, description='Validating URI References per RFC 3986', - long_description=readme + '\n\n' + history, + long_description=readme, author='Ian Cordasco', - author_email='ian.cordasco@rackspace.com', - url='https://rfc3986.readthedocs.org', + author_email='graffatcolmingov@gmail.com', + url='http://rfc3986.readthedocs.io', packages=packages, + package_dir={'': 'src/'}, package_data={'': ['LICENSE']}, include_package_data=True, license='Apache 2.0', @@ -43,10 +35,11 @@ setup( 'Natural Language :: English', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ), ) diff --git a/rfc3986/__init__.py b/src/rfc3986/__init__.py similarity index 83% rename from rfc3986/__init__.py rename to src/rfc3986/__init__.py index e32520e..81bb9c4 100644 --- a/rfc3986/__init__.py +++ b/src/rfc3986/__init__.py @@ -14,18 +14,19 @@ # limitations under the License. """ -rfc3986 -======= +An implementation of semantics and validations described in RFC 3986. -An implementation of semantics and validations described in RFC 3986. See -http://rfc3986.rtfd.org/ for documentation. +See http://rfc3986.readthedocs.io/ for detailed documentation. :copyright: (c) 2014 Rackspace :license: Apache v2.0, see LICENSE for details """ -from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri, - urlparse) +from .api import is_valid_uri +from .api import normalize_uri +from .api import uri_reference +from .api import URIReference +from .api import urlparse from .parseresult import ParseResult __title__ = 'rfc3986' @@ -33,7 +34,7 @@ __author__ = 'Ian Cordasco' __author_email__ = 'graffatcolmingov@gmail.com' __license__ = 'Apache v2.0' __copyright__ = 'Copyright 2014 Rackspace' -__version__ = '0.4.1' +__version__ = '1.0.0' __all__ = ( 'ParseResult', diff --git a/src/rfc3986/abnf_regexp.py b/src/rfc3986/abnf_regexp.py new file mode 100644 index 0000000..90bd1e4 --- /dev/null +++ b/src/rfc3986/abnf_regexp.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for the regular expressions crafted from ABNF.""" + +# https://tools.ietf.org/html/rfc3986#page-13 +GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@" +GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS) +# https://tools.ietf.org/html/rfc3986#page-13 +SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;=" +SUB_DELIMITERS_SET = set(SUB_DELIMITERS) +# Escape the '*' for use in regular expressions +SUB_DELIMITERS_RE = "!$&'()\*+,;=" +RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET) +ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' +DIGIT = '0123456789' +# https://tools.ietf.org/html/rfc3986#section-2.3 +UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-' +UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS) +NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%') +# We need to escape the '-' in this case: +UNRESERVED_RE = 'A-Za-z0-9._~\-' + +# Percent encoded character values +PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}' +PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED + +# NOTE(sigmavirus24): We're going to use more strict regular expressions +# than appear in Appendix B for scheme. This will prevent over-eager +# consuming of items that aren't schemes. +SCHEME_RE = '[a-zA-Z][a-zA-Z0-9+.-]*' +_AUTHORITY_RE = '[^/?#]*' +_PATH_RE = '[^?#]*' +_QUERY_RE = '[^#]*' +_FRAGMENT_RE = '.*' + +# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B +COMPONENT_PATTERN_DICT = { + 'scheme': SCHEME_RE, + 'authority': _AUTHORITY_RE, + 'path': _PATH_RE, + 'query': _QUERY_RE, + 'fragment': _FRAGMENT_RE, +} + +# See http://tools.ietf.org/html/rfc3986#appendix-B +# In this case, we name each of the important matches so we can use +# SRE_Match#groupdict to parse the values out if we so choose. This is also +# modified to ignore other matches that are not important to the parsing of +# the reference so we can also simply use SRE_Match#groups. +URL_PARSING_RE = ( + '(?:(?P{scheme}):)?(?://(?P{authority}))?' + '(?P{path})(?:\?(?P{query}))?' + '(?:#(?P{fragment}))?' +).format(**COMPONENT_PATTERN_DICT) + + +# ######################### +# Authority Matcher Section +# ######################### + +# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2 +# The pattern for a regular name, e.g., www.google.com, api.github.com +REGULAR_NAME_RE = REG_NAME = '(({0})*|[{1}]*)'.format( + '%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + UNRESERVED_RE +) +# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1, +IPv4_RE = '([0-9]{1,3}.){3}[0-9]{1,3}' +# Hexadecimal characters used in each piece of an IPv6 address +HEXDIG_RE = '[0-9A-Fa-f]{1,4}' +# Least-significant 32 bits of an IPv6 address +LS32_RE = '({hex}:{hex}|{ipv4})'.format(hex=HEXDIG_RE, ipv4=IPv4_RE) +# Substitutions into the following patterns for IPv6 patterns defined +# http://tools.ietf.org/html/rfc3986#page-20 +_subs = {'hex': HEXDIG_RE, 'ls32': LS32_RE} + +# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details +# about ABNF (Augmented Backus-Naur Form) use in the comments +variations = [ + # 6( h16 ":" ) ls32 + '(%(hex)s:){6}%(ls32)s' % _subs, + # "::" 5( h16 ":" ) ls32 + '::(%(hex)s:){5}%(ls32)s' % _subs, + # [ h16 ] "::" 4( h16 ":" ) ls32 + '(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % _subs, + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + '((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % _subs, + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + '((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % _subs, + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + '((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % _subs, + # [ *4( h16 ":" ) h16 ] "::" ls32 + '((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % _subs, + # [ *5( h16 ":" ) h16 ] "::" h16 + '((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % _subs, + # [ *6( h16 ":" ) h16 ] "::" + '((%(hex)s:){0,6}%(hex)s)?::' % _subs, +] + +IPv6_RE = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format( + *variations +) + +IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % ( + UNRESERVED_RE + SUB_DELIMITERS_RE + ':' +) + + +# RFC 6874 Zone ID ABNF +ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+' +IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID + +IP_LITERAL_RE = '\[({0}|(?:{1})|{2})\]'.format( + IPv6_RE, + IPv6_ADDRZ_RE, + IPv_FUTURE_RE, +) + +# Pattern for matching the host piece of the authority +HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format( + REG_NAME, + IPv4_RE, + IP_LITERAL_RE, +) +USERINFO_RE = '^[A-Za-z0-9_.~\-%:]+' +PORT_RE = '[0-9]{1,5}' + +# #################### +# Path Matcher Section +# #################### + +# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information +# about the path patterns defined below. +segments = { + 'segment': PCHAR + '*', + # Non-zero length segment + 'segment-nz': PCHAR + '+', + # Non-zero length segment without ":" + 'segment-nz-nc': PCHAR.replace(':', '') + '+' +} + +# Path types taken from Section 3.3 (linked above) +PATH_EMPTY = '^$' +PATH_ROOTLESS = '%(segment-nz)s(/%(segment)s)*' % segments +PATH_NOSCHEME = '%(segment-nz-nc)s(/%(segment)s)*' % segments +PATH_ABSOLUTE = '/(%s)?' % PATH_ROOTLESS +PATH_ABEMPTY = '(/%(segment)s)*' % segments +PATH_RE = '^(%s|%s|%s|%s|%s)$' % ( + PATH_ABEMPTY, PATH_ABSOLUTE, PATH_NOSCHEME, PATH_ROOTLESS, PATH_EMPTY +) + +FRAGMENT_RE = QUERY_RE = ( + '^([/?:@' + UNRESERVED_RE + SUB_DELIMITERS_RE + ']|%s)*$' % PCT_ENCODED +) + +# ########################## +# Relative reference matcher +# ########################## + +# See http://tools.ietf.org/html/rfc3986#section-4.2 for details +RELATIVE_PART_RE = '(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + PATH_ABEMPTY, + PATH_ABSOLUTE, + PATH_NOSCHEME, + PATH_EMPTY, +) + +# See http://tools.ietf.org/html/rfc3986#section-3 for definition +HIER_PART_RE = '(//%s%s|%s|%s|%s)' % ( + COMPONENT_PATTERN_DICT['authority'], + PATH_ABEMPTY, + PATH_ABSOLUTE, + PATH_ROOTLESS, + PATH_EMPTY, +) diff --git a/rfc3986/api.py b/src/rfc3986/api.py similarity index 95% rename from rfc3986/api.py rename to src/rfc3986/api.py index 3e9e401..17f4daf 100644 --- a/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -rfc3986.api -~~~~~~~~~~~ +Module containing the simple and functional API for rfc3986. -This defines the simple API to rfc3986. This module defines 3 functions and -provides access to the class ``URIReference``. +This module defines functions and provides access to the public attributes +and classes of rfc3986. """ -from .uri import URIReference from .parseresult import ParseResult +from .uri import URIReference def uri_reference(uri, encoding='utf-8'): diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py new file mode 100644 index 0000000..2eb9ab4 --- /dev/null +++ b/src/rfc3986/builder.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the logic for the URIBuilder object.""" +from . import compat +from . import normalizers +from . import uri + + +class URIBuilder(object): + """Object to aid in building up a URI Reference from parts. + + .. note:: + + This object should be instantiated by the user, but it's recommended + that it is not provided with arguments. Instead, use the available + method to populate the fields. + + """ + + def __init__(self, scheme=None, userinfo=None, host=None, port=None, + path=None, query=None, fragment=None): + """Initialize our URI builder. + + :param str scheme: + (optional) + :param str userinfo: + (optional) + :param str host: + (optional) + :param int port: + (optional) + :param str path: + (optional) + :param str query: + (optional) + :param str fragment: + (optional) + """ + self.scheme = scheme + self.userinfo = userinfo + self.host = host + self.port = port + self.path = path + self.query = query + self.fragment = fragment + + def __repr__(self): + """Provide a convenient view of our builder object.""" + formatstr = ('URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, ' + 'host={b.host}, port={b.port}, path={b.path}, ' + 'query={b.query}, fragment={b.fragment})') + return formatstr.format(b=self) + + def add_scheme(self, scheme): + """Add a scheme to our builder object. + + After normalizing, this will generate a new URIBuilder instance with + the specified scheme and all other attributes the same. + + .. code-block:: python + + >>> URIBuilder().add_scheme('HTTPS') + URIBuilder(scheme='https', userinfo=None, host=None, port=None, + path=None, query=None, fragment=None) + + """ + scheme = normalizers.normalize_scheme(scheme) + return URIBuilder( + scheme=scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_credentials(self, username, password): + """Add credentials as the userinfo portion of the URI. + + .. code-block:: python + + >>> URIBuilder().add_credentials('root', 's3crete') + URIBuilder(scheme=None, userinfo='root:s3crete', host=None, + port=None, path=None, query=None, fragment=None) + + >>> URIBuilder().add_credentials('root', None) + URIBuilder(scheme=None, userinfo='root', host=None, + port=None, path=None, query=None, fragment=None) + """ + if username is None: + raise ValueError('Username cannot be None') + userinfo = normalizers.normalize_username(username) + + if password is not None: + userinfo = '{}:{}'.format( + userinfo, + normalizers.normalize_password(password), + ) + + return URIBuilder( + scheme=self.scheme, + userinfo=userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_host(self, host): + """Add hostname to the URI. + + .. code-block:: python + + >>> URIBuilder().add_host('google.com') + URIBuilder(scheme=None, userinfo=None, host='google.com', + port=None, path=None, query=None, fragment=None) + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=normalizers.normalize_host(host), + port=self.port, + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_port(self, port): + """Add port to the URI. + + .. code-block:: python + + >>> URIBuilder().add_port(80) + URIBuilder(scheme=None, userinfo=None, host=None, port='80', + path=None, query=None, fragment=None) + + >>> URIBuilder().add_port(443) + URIBuilder(scheme=None, userinfo=None, host=None, port='443', + path=None, query=None, fragment=None) + + """ + port_int = int(port) + if port_int < 0: + raise ValueError( + 'ports are not allowed to be negative. You provided {}'.format( + port_int, + ) + ) + if port_int > 65535: + raise ValueError( + 'ports are not allowed to be larger than 65535. ' + 'You provided {}'.format( + port_int, + ) + ) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port='{}'.format(port_int), + path=self.path, + query=self.query, + fragment=self.fragment, + ) + + def add_path(self, path): + """Add a path to the URI. + + .. code-block:: python + + >>> URIBuilder().add_path('sigmavirus24/rfc3985') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path='/sigmavirus24/rfc3986', query=None, fragment=None) + + >>> URIBuilder().add_path('/checkout.php') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path='/checkout.php', query=None, fragment=None) + + """ + if not path.startswith('/'): + path = '/{}'.format(path) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=normalizers.normalize_path(path), + query=self.query, + fragment=self.fragment, + ) + + def add_query_from(self, query_items): + """Generate and add a query a dictionary or list of tuples. + + .. code-block:: python + + >>> URIBuilder().add_query_from({'a': 'b c'}) + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b+c', fragment=None) + + >>> URIBuilder().add_query_from([('a', 'b c')]) + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b+c', fragment=None) + + """ + query = normalizers.normalize_query(compat.urlencode(query_items)) + + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=query, + fragment=self.fragment, + ) + + def add_query(self, query): + """Add a pre-formated query string to the URI. + + .. code-block:: python + + >>> URIBuilder().add_query('a=b&c=d') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query='a=b&c=d', fragment=None) + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=normalizers.normalize_query(query), + fragment=self.fragment, + ) + + def add_fragment(self, fragment): + """Add a fragment to the URI. + + .. code-block:: python + + >>> URIBuilder().add_fragment('section-2.6.1') + URIBuilder(scheme=None, userinfo=None, host=None, port=None, + path=None, query=None, fragment='section-2.6.1') + + """ + return URIBuilder( + scheme=self.scheme, + userinfo=self.userinfo, + host=self.host, + port=self.port, + path=self.path, + query=self.query, + fragment=normalizers.normalize_fragment(fragment), + ) + + def finalize(self): + """Create a URIReference from our builder. + + .. code-block:: python + + >>> URIBuilder().add_scheme('https').add_host('github.com' + ... ).add_path('sigmavirus24/rfc3986').finalize().unsplit() + 'https://github.com/sigmavirus24/rfc3986' + + >>> URIBuilder().add_scheme('https').add_host('github.com' + ... ).add_path('sigmavirus24/rfc3986').add_credentials( + ... 'sigmavirus24', 'not-re@l').finalize().unsplit() + 'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986' + + """ + return uri.URIReference( + self.scheme, + normalizers.normalize_authority( + (self.userinfo, self.host, self.port) + ), + self.path, + self.query, + self.fragment, + ) diff --git a/rfc3986/compat.py b/src/rfc3986/compat.py similarity index 60% rename from rfc3986/compat.py rename to src/rfc3986/compat.py index 6fc7f6d..8da7770 100644 --- a/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -12,20 +12,43 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Compatibility module for Python 2 and 3 support.""" import sys +try: + from urllib.parse import quote as urlquote +except ImportError: # Python 2.x + from urllib import quote as urlquote -if sys.version_info >= (3, 0): +try: + from urllib.parse import urlencode +except ImportError: # Python 2.x + from urllib import urlencode + +__all__ = ( + 'to_bytes', + 'to_str', + 'urlquote', + 'urlencode', +) + +PY3 = (3, 0) <= sys.version_info < (4, 0) +PY2 = (2, 6) <= sys.version_info < (2, 8) + + +if PY3: unicode = str # Python 3.x def to_str(b, encoding): + """Ensure that b is text in the specified encoding.""" if hasattr(b, 'decode') and not isinstance(b, unicode): b = b.decode('utf-8') return b def to_bytes(s, encoding): + """Ensure that s is converted to bytes from the encoding.""" if hasattr(s, 'encode') and not isinstance(s, bytes): s = s.encode('utf-8') return s diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py new file mode 100644 index 0000000..49e8d08 --- /dev/null +++ b/src/rfc3986/exceptions.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +"""Exceptions module for rfc3986.""" + + +class RFC3986Exception(Exception): + """Base class for all rfc3986 exception classes.""" + + pass + + +class InvalidAuthority(RFC3986Exception): + """Exception when the authority string is invalid.""" + + def __init__(self, authority): + """Initialize the exception with the invalid authority.""" + super(InvalidAuthority, self).__init__( + "The authority ({0}) is not valid.".format(authority)) + + +class InvalidPort(RFC3986Exception): + """Exception when the port is invalid.""" + + def __init__(self, port): + """Initialize the exception with the invalid port.""" + super(InvalidPort, self).__init__( + 'The port ("{0}") is not valid.'.format(port)) + + +class ResolutionError(RFC3986Exception): + """Exception to indicate a failure to resolve a URI.""" + + def __init__(self, uri): + """Initialize the error with the failed URI.""" + super(ResolutionError, self).__init__( + "{0} is not an absolute URI.".format(uri.unsplit())) + + +class ValidationError(RFC3986Exception): + """Exception raised during Validation of a URI.""" + + pass + + +class MissingComponentError(ValidationError): + """Exception raised when a required component is missing.""" + + def __init__(self, uri, *component_names): + """Initialize the error with the missing component name.""" + verb = 'was' + if len(component_names) > 1: + verb = 'were' + + self.uri = uri + self.components = sorted(component_names) + components = ', '.join(self.components) + super(MissingComponentError, self).__init__( + "{} {} required but missing".format(components, verb), + uri, + self.components, + ) + + +class UnpermittedComponentError(ValidationError): + """Exception raised when a component has an unpermitted value.""" + + def __init__(self, component_name, component_value, allowed_values): + """Initialize the error with the unpermitted component.""" + super(UnpermittedComponentError, self).__init__( + "{} was required to be one of {!r} but was {!r}".format( + component_name, list(sorted(allowed_values)), component_value, + ), + component_name, + component_value, + allowed_values, + ) + self.component_name = component_name + self.component_value = component_value + self.allowed_values = allowed_values + + +class PasswordForbidden(ValidationError): + """Exception raised when a URL has a password in the userinfo section.""" + + def __init__(self, uri): + """Initialize the error with the URI that failed validation.""" + unsplit = getattr(uri, 'unsplit', lambda: uri) + super(PasswordForbidden, self).__init__( + '"{}" contained a password when validation forbade it'.format( + unsplit() + ) + ) + self.uri = uri diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py new file mode 100644 index 0000000..9eaf064 --- /dev/null +++ b/src/rfc3986/misc.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2014 Rackspace +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Module containing compiled regular expressions and constants. + +This module contains important constants, patterns, and compiled regular +expressions for parsing and validating URIs and their components. +""" + +import re + +from . import abnf_regexp + +# These are enumerated for the named tuple used as a superclass of +# URIReference +URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment'] + +important_characters = { + 'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS, + 'sub_delimiters': abnf_regexp.SUB_DELIMITERS, + # We need to escape the '*' in this case + 're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE, + 'unreserved_chars': abnf_regexp.UNRESERVED_CHARS, + # We need to escape the '-' in this case: + 're_unreserved': abnf_regexp.UNRESERVED_RE, +} + +# For details about delimiters and reserved characters, see: +# http://tools.ietf.org/html/rfc3986#section-2.2 +GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET +SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET +RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET +# For details about unreserved characters, see: +# http://tools.ietf.org/html/rfc3986#section-2.3 +UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET +NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET + +URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE) + +SUBAUTHORITY_MATCHER = re.compile(( + '^(?:(?P{0})@)?' # userinfo + '(?P{1})' # host + ':?(?P{2})?$' # port + ).format(abnf_regexp.USERINFO_RE, + abnf_regexp.HOST_PATTERN, + abnf_regexp.PORT_RE)) + + +IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$') + +# Matcher used to validate path components +PATH_MATCHER = re.compile(abnf_regexp.PATH_RE) + + +# ################################## +# Query and Fragment Matcher Section +# ################################## + +QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE) + +FRAGMENT_MATCHER = QUERY_MATCHER + +# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1 +SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE)) + +RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % ( + abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE, + abnf_regexp.FRAGMENT_RE, +)) + +# See http://tools.ietf.org/html/rfc3986#section-4.3 +ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % ( + abnf_regexp.COMPONENT_PATTERN_DICT['scheme'], + abnf_regexp.HIER_PART_RE, + abnf_regexp.QUERY_RE[1:-1], +)) + + +# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 +def merge_paths(base_uri, relative_path): + """Merge a base URI's path with a relative URI's path.""" + if base_uri.path is None and base_uri.authority is not None: + return '/' + relative_path + else: + path = base_uri.path or '' + index = path.rfind('/') + return path[:index] + '/' + relative_path + + +UseExisting = object() diff --git a/rfc3986/normalizers.py b/src/rfc3986/normalizers.py similarity index 76% rename from rfc3986/normalizers.py rename to src/rfc3986/normalizers.py index 9e0812e..8e85886 100644 --- a/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -12,29 +12,48 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Module with functions to normalize components.""" import re -from .compat import to_bytes -from .misc import NON_PCT_ENCODED +from . import compat +from . import misc def normalize_scheme(scheme): + """Normalize the scheme component.""" return scheme.lower() def normalize_authority(authority): + """Normalize an authority tuple to a string.""" userinfo, host, port = authority result = '' if userinfo: result += normalize_percent_characters(userinfo) + '@' if host: - result += host.lower() + result += normalize_host(host) if port: result += ':' + port return result +def normalize_username(username): + """Normalize a username to make it safe to include in userinfo.""" + return compat.urlquote(username) + + +def normalize_password(password): + """Normalize a password to make safe for userinfo.""" + return compat.urlquote(password) + + +def normalize_host(host): + """Normalize a host string.""" + return host.lower() + + def normalize_path(path): + """Normalize the path string.""" if not path: return path @@ -43,12 +62,14 @@ def normalize_path(path): def normalize_query(query): + """Normalize the query string.""" if not query: return query return normalize_percent_characters(query) def normalize_fragment(fragment): + """Normalize the fragment string.""" if not fragment: return fragment return normalize_percent_characters(fragment) @@ -70,6 +91,10 @@ def normalize_percent_characters(s): def remove_dot_segments(s): + """Remove dot segments from the string. + + See also Section 5.2.4 of :rfc:`3986`. + """ # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code segments = s.split('/') # Turn the path into a list of segments output = [] # Initialize the variable to use to store output @@ -100,10 +125,11 @@ def remove_dot_segments(s): def encode_component(uri_component, encoding): + """Encode the specific component in the provided encoding.""" if uri_component is None: return uri_component - uri_bytes = to_bytes(uri_component, encoding) + uri_bytes = compat.to_bytes(uri_component, encoding) encoded_uri = bytearray() @@ -111,7 +137,7 @@ def encode_component(uri_component, encoding): # Will return a single character bytestring on both Python 2 & 3 byte = uri_bytes[i:i+1] byte_ord = ord(byte) - if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED: + if byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED: encoded_uri.extend(byte) continue encoded_uri.extend('%{0:02x}'.format(byte_ord).encode()) diff --git a/rfc3986/parseresult.py b/src/rfc3986/parseresult.py similarity index 89% rename from rfc3986/parseresult.py rename to src/rfc3986/parseresult.py index 689ab1c..4fdd50a 100644 --- a/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -12,10 +12,12 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Module containing the urlparse compatibility logic.""" from collections import namedtuple from . import compat from . import exceptions +from . import misc from . import normalizers from . import uri @@ -44,31 +46,38 @@ class ParseResultMixin(object): return self.authority def geturl(self): - """Standard library shim to the unsplit method.""" + """Shim to match the standard library method.""" return self.unsplit() @property def hostname(self): - """Standard library shim for the host portion of the URI.""" + """Shim to match the standard library.""" return self.host @property def netloc(self): - """Standard library shim for the authority portion of the URI.""" + """Shim to match the standard library.""" return self.authority @property def params(self): - """Standard library shim for the query portion of the URI.""" + """Shim to match the standard library.""" return self.query class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), ParseResultMixin): + """Implementation of urlparse compatibility class. + + This uses the URIReference logic to handle compatibility with the + urlparse.ParseResult class. + """ + slots = () def __new__(cls, scheme, userinfo, host, port, path, query, fragment, uri_ref, encoding='utf-8'): + """Create a new ParseResult.""" parse_result = super(ParseResult, cls).__new__( cls, scheme or None, @@ -139,16 +148,19 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), @property def authority(self): - """Normalized authority generated from the subauthority parts.""" + """Return the normalized authority.""" return self.reference.authority - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None): + def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting, + host=misc.UseExisting, port=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting): + """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} for name, value in attributes: - if value is None: + if value is misc.UseExisting: value = getattr(self, name) attrs_dict[name] = value authority = self._generate_authority(attrs_dict) @@ -160,6 +172,7 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) def encode(self, encoding=None): + """Convert to an instance of ParseResultBytes.""" encoding = encoding or self.encoding attrs = dict( zip(PARSED_COMPONENTS, @@ -187,8 +200,11 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS), class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), ParseResultMixin): + """Compatibility shim for the urlparse.ParseResultBytes object.""" + def __new__(cls, scheme, userinfo, host, port, path, query, fragment, uri_ref, encoding='utf-8', lazy_normalize=True): + """Create a new ParseResultBytes instance.""" parse_result = super(ParseResultBytes, cls).__new__( cls, scheme or None, @@ -267,16 +283,19 @@ class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS), @property def authority(self): - """Normalized authority generated from the subauthority parts.""" + """Return the normalized authority.""" return self.reference.authority.encode(self.encoding) - def copy_with(self, scheme=None, userinfo=None, host=None, port=None, - path=None, query=None, fragment=None, lazy_normalize=True): + def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting, + host=misc.UseExisting, port=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting, lazy_normalize=True): + """Create a copy of this instance replacing with specified parts.""" attributes = zip(PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment)) attrs_dict = {} for name, value in attributes: - if value is None: + if value is misc.UseExisting: value = getattr(self, name) if not isinstance(value, bytes) and hasattr(value, 'encode'): value = value.encode(self.encoding) diff --git a/rfc3986/uri.py b/src/rfc3986/uri.py similarity index 63% rename from rfc3986/uri.py rename to src/rfc3986/uri.py index 2a86c6b..93694a5 100644 --- a/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -1,3 +1,4 @@ +"""Module containing the implementation of the URIReference class.""" # -*- coding: utf-8 -*- # Copyright (c) 2014 Rackspace # Copyright (c) 2015 Ian Cordasco @@ -15,24 +16,76 @@ # limitations under the License. from collections import namedtuple -from .compat import to_str -from .exceptions import InvalidAuthority, ResolutionError -from .misc import ( - ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, - QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, - URI_COMPONENTS, merge_paths - ) -from .normalizers import ( - encode_component, normalize_scheme, normalize_authority, normalize_path, - normalize_query, normalize_fragment - ) +from . import compat +from . import exceptions as exc +from . import misc +from . import normalizers +from . import validators -class URIReference(namedtuple('URIReference', URI_COMPONENTS)): +class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS)): + """Immutable object representing a parsed URI Reference. + + .. note:: + + This class is not intended to be directly instantiated by the user. + + This object exposes attributes for the following components of a + URI: + + - scheme + - authority + - path + - query + - fragment + + .. attribute:: scheme + + The scheme that was parsed for the URI Reference. For example, + ``http``, ``https``, ``smtp``, ``imap``, etc. + + .. attribute:: authority + + Component of the URI that contains the user information, host, + and port sub-components. For example, + ``google.com``, ``127.0.0.1:5000``, ``username@[::1]``, + ``username:password@example.com:443``, etc. + + .. attribute:: path + + The path that was parsed for the given URI Reference. For example, + ``/``, ``/index.php``, etc. + + .. attribute:: query + + The query component for a given URI Reference. For example, ``a=b``, + ``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc. + + .. attribute:: fragment + + The fragment component of a URI. For example, ``section-3.1``. + + This class also provides extra attributes for easier access to information + like the subcomponents of the authority component. + + .. attribute:: userinfo + + The user information parsed from the authority. + + .. attribute:: host + + The hostname, IPv4, or IPv6 adddres parsed from the authority. + + .. attribute:: port + + The port parsed from the authority. + """ + slots = () def __new__(cls, scheme, authority, path, query, fragment, encoding='utf-8'): + """Create a new URIReference.""" ref = super(URIReference, cls).__new__( cls, scheme or None, @@ -44,6 +97,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): return ref def __eq__(self, other): + """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): other_ref = URIReference(*other) @@ -67,49 +121,52 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :param str encoding: The encoding of the string provided :returns: :class:`URIReference` or subclass thereof """ - uri_string = to_str(uri_string, encoding) + uri_string = compat.to_str(uri_string, encoding) - split_uri = URI_MATCHER.match(uri_string).groupdict() - return cls(split_uri['scheme'], split_uri['authority'], - encode_component(split_uri['path'], encoding), - encode_component(split_uri['query'], encoding), - encode_component(split_uri['fragment'], encoding), encoding) + split_uri = misc.URI_MATCHER.match(uri_string).groupdict() + return cls( + split_uri['scheme'], split_uri['authority'], + normalizers.encode_component(split_uri['path'], encoding), + normalizers.encode_component(split_uri['query'], encoding), + normalizers.encode_component(split_uri['fragment'], encoding), + encoding, + ) def authority_info(self): - """Returns a dictionary with the ``userinfo``, ``host``, and ``port``. + """Return a dictionary with the ``userinfo``, ``host``, and ``port``. - If the authority is not valid, it will raise a ``InvalidAuthority`` - Exception. + If the authority is not valid, it will raise a + :class:`~rfc3986.exceptions.InvalidAuthority` Exception. :returns: ``{'userinfo': 'username:password', 'host': 'www.example.com', 'port': '80'}`` :rtype: dict - :raises InvalidAuthority: If the authority is not ``None`` and can not - be parsed. + :raises rfc3986.exceptions.InvalidAuthority: + If the authority is not ``None`` and can not be parsed. """ if not self.authority: return {'userinfo': None, 'host': None, 'port': None} - match = SUBAUTHORITY_MATCHER.match(self.authority) + match = misc.SUBAUTHORITY_MATCHER.match(self.authority) if match is None: # In this case, we have an authority that was parsed from the URI # Reference, but it cannot be further parsed by our - # SUBAUTHORITY_MATCHER. In this case it must not be a valid + # misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid # authority. - raise InvalidAuthority(self.authority.encode(self.encoding)) + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) # We had a match, now let's ensure that it is actually a valid host # address if it is IPv4 matches = match.groupdict() host = matches.get('host') - if (host and IPv4_MATCHER.match(host) and not - valid_ipv4_host_address(host)): + if (host and misc.IPv4_MATCHER.match(host) and not + validators.valid_ipv4_host_address(host)): # If we have a host, it appears to be IPv4 and it does not have # valid bytes, it is an InvalidAuthority. - raise InvalidAuthority(self.authority.encode(self.encoding)) + raise exc.InvalidAuthority(self.authority.encode(self.encoding)) return matches @@ -118,16 +175,16 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """If present, a string representing the host.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['host'] @property def port(self): - """If present, the port (as a string) extracted from the authority.""" + """If present, the port extracted from the authority.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['port'] @@ -136,7 +193,7 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """If present, the userinfo extracted from the authority.""" try: authority = self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return None return authority['userinfo'] @@ -148,10 +205,10 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :returns: ``True`` if it is an absolute URI, ``False`` otherwise. :rtype: bool """ - return bool(ABSOLUTE_URI_MATCHER.match(self.unsplit())) + return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) def is_valid(self, **kwargs): - """Determines if the URI is valid. + """Determine if the URI is valid. :param bool require_scheme: Set to ``True`` if you wish to require the presence of the scheme component. @@ -175,82 +232,69 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): ] return all(v(r) for v, r in validators) - def _is_valid(self, value, matcher, require): - if require: - return (value is not None - and matcher.match(value)) - - # require is False and value is not None - return value is None or matcher.match(value) - def authority_is_valid(self, require=False): - """Determines if the authority component is valid. + """Determine if the authority component is valid. - :param str require: Set to ``True`` to require the presence of this - component. - :returns: ``True`` if the authority is valid. ``False`` otherwise. - :rtype: bool + :param bool require: + Set to ``True`` to require the presence of this component. + :returns: + ``True`` if the authority is valid. ``False`` otherwise. + :rtype: + bool """ try: self.authority_info() - except InvalidAuthority: + except exc.InvalidAuthority: return False - is_valid = self._is_valid(self.authority, - SUBAUTHORITY_MATCHER, - require) - - # Ensure that IPv4 addresses have valid bytes - if is_valid and self.host and IPv4_MATCHER.match(self.host): - return valid_ipv4_host_address(self.host) - - # Perhaps the host didn't exist or if it did, it wasn't an IPv4-like - # address. In either case, we want to rely on the `_is_valid` check, - # so let's return that. - return is_valid + return validators.authority_is_valid( + self.authority, + host=self.host, + require=require, + ) def scheme_is_valid(self, require=False): - """Determines if the scheme component is valid. + """Determine if the scheme component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the scheme is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.scheme, SCHEME_MATCHER, require) + return validators.scheme_is_valid(self.scheme, require) def path_is_valid(self, require=False): - """Determines if the path component is valid. + """Determine if the path component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the path is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.path, PATH_MATCHER, require) + return validators.path_is_valid(self.path, require) def query_is_valid(self, require=False): - """Determines if the query component is valid. + """Determine if the query component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the query is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.query, QUERY_MATCHER, require) + return validators.query_is_valid(self.query, require) def fragment_is_valid(self, require=False): - """Determines if the fragment component is valid. + """Determine if the fragment component is valid. :param str require: Set to ``True`` to require the presence of this component. :returns: ``True`` if the fragment is valid. ``False`` otherwise. :rtype: bool """ - return self._is_valid(self.fragment, FRAGMENT_MATCHER, require) + return validators.fragment_is_valid(self.fragment, require) def normalize(self): - """Normalize this reference as described in Section 6.2.2 + """Normalize this reference as described in Section 6.2.2. This is not an in-place normalization. Instead this creates a new URIReference. @@ -260,12 +304,12 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """ # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in # this method. - return URIReference(normalize_scheme(self.scheme or ''), - normalize_authority( + return URIReference(normalizers.normalize_scheme(self.scheme or ''), + normalizers.normalize_authority( (self.userinfo, self.host, self.port)), - normalize_path(self.path or ''), - normalize_query(self.query), - normalize_fragment(self.fragment), + normalizers.normalize_path(self.path or ''), + normalizers.normalize_query(self.query), + normalizers.normalize_fragment(self.fragment), self.encoding) def normalized_equality(self, other_ref): @@ -291,13 +335,14 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): :returns: A new URIReference which is the result of resolving this reference using ``base_uri``. :rtype: :class:`URIReference` - :raises ResolutionError: If the ``base_uri`` is not an absolute URI. + :raises rfc3986.exceptions.ResolutionError: + If the ``base_uri`` is not an absolute URI. """ if not isinstance(base_uri, URIReference): base_uri = URIReference.from_string(base_uri) if not base_uri.is_absolute(): - raise ResolutionError(base_uri) + raise exc.ResolutionError(base_uri) # This is optional per # http://tools.ietf.org/html/rfc3986#section-5.2.1 @@ -311,12 +356,14 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): # http://tools.ietf.org/html/rfc3986#page-32 if resolving.scheme is not None: - target = resolving.copy_with(path=normalize_path(resolving.path)) + target = resolving.copy_with( + path=normalizers.normalize_path(resolving.path) + ) else: if resolving.authority is not None: target = resolving.copy_with( scheme=base_uri.scheme, - path=normalize_path(resolving.path) + path=normalizers.normalize_path(resolving.path) ) else: if resolving.path is None: @@ -332,10 +379,10 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): ) else: if resolving.path.startswith('/'): - path = normalize_path(resolving.path) + path = normalizers.normalize_path(resolving.path) else: - path = normalize_path( - merge_paths(base_uri, resolving.path) + path = normalizers.normalize_path( + misc.merge_paths(base_uri, resolving.path) ) target = resolving.copy_with( scheme=base_uri.scheme, @@ -365,8 +412,26 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): result_list.extend(['#', self.fragment]) return ''.join(result_list) - def copy_with(self, scheme=None, authority=None, path=None, query=None, - fragment=None): + def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting, + path=misc.UseExisting, query=misc.UseExisting, + fragment=misc.UseExisting): + """Create a copy of this reference with the new components. + + :param str scheme: + (optional) The scheme to use for the new reference. + :param str authority: + (optional) The authority to use for the new reference. + :param str path: + (optional) The path to use for the new reference. + :param str query: + (optional) The query to use for the new reference. + :param str fragment: + (optional) The fragment to use for the new reference. + :returns: + New URIReference with provided components. + :rtype: + URIReference + """ attributes = { 'scheme': scheme, 'authority': authority, @@ -375,14 +440,8 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): 'fragment': fragment, } for key, value in list(attributes.items()): - if value is None: + if value is misc.UseExisting: del attributes[key] uri = self._replace(**attributes) uri.encoding = self.encoding return uri - - -def valid_ipv4_host_address(host): - # If the host exists, and it might be IPv4, check each byte in the - # address. - return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py new file mode 100644 index 0000000..8e7eb0b --- /dev/null +++ b/src/rfc3986/validators.py @@ -0,0 +1,311 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the validation logic for rfc3986.""" +from . import exceptions +from . import misc +from . import normalizers + + +class Validator(object): + """Object used to configure validation of all objects in rfc3986. + + Example usage:: + + >>> from rfc3986 import api, validators + >>> uri = api.uri_reference('https://github.com/') + >>> validator = validators.Validator().require_presence_of( + ... 'scheme', 'host', 'path', + ... ).allow_schemes( + ... 'http', 'https', + ... ).allow_hosts( + ... '127.0.0.1', 'github.com', + ... ) + >>> validator.validate(uri) + >>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com') + >>> validator.validate(invalid_uri) + Traceback (most recent call last): + ... + rfc3986.exceptions.MissingComponentError: ('path was required but + missing', URIReference(scheme=u'imap', authority=u'mail.google.com', + path=None, query=None, fragment=None), ['path']) + + """ + + COMPONENT_NAMES = frozenset([ + 'scheme', + 'userinfo', + 'host', + 'port', + 'path', + 'query', + 'fragment', + ]) + + def __init__(self): + """Initialize our default validations.""" + self.allowed_schemes = set() + self.allowed_hosts = set() + self.allowed_ports = set() + self.allow_password = True + self.required_components = { + 'scheme': False, + 'userinfo': False, + 'host': False, + 'port': False, + 'path': False, + 'query': False, + 'fragment': False, + } + + def allow_schemes(self, *schemes): + """Require the scheme to be one of the provided schemes. + + :param schemes: + Schemes, without ``://`` that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for scheme in schemes: + self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) + return self + + def allow_hosts(self, *hosts): + """Require the host to be one of the provided hosts. + + :param hosts: + Hosts that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for host in hosts: + self.allowed_hosts.add(normalizers.normalize_host(host)) + return self + + def allow_ports(self, *ports): + """Require the port to be one of the provided ports. + + :param ports: + Ports that are allowed. + :returns: + The validator instance. + :rtype: + Validator + """ + for port in ports: + port_int = int(port, base=10) + if 0 <= port_int <= 65535: + self.allowed_ports.add(port) + return self + + def allow_use_of_password(self): + """Allow passwords to be present in the URI.""" + self.allow_password = True + return self + + def forbid_use_of_password(self): + """Prevent passwords from being included in the URI.""" + self.allow_password = False + return self + + def require_presence_of(self, *components): + """Require the components provided. + + :param components: + Names of components from :attr:`Validator.COMPONENT_NAMES`. + :returns: + The validator instance. + :rtype: + Validator + """ + components = [c.lower() for c in components] + for component in components: + if component not in self.COMPONENT_NAMES: + raise ValueError( + '"{}" is not a valid component'.format(component) + ) + self.required_components.update({ + component: True for component in components + }) + return self + + def validate(self, uri): + """Check a URI for conditions specified on this validator. + + :param uri: + Parsed URI to validate. + :type uri: + rfc3986.uri.URIReference + :raises MissingComponentError: + When a required component is missing. + :raises UnpermittedComponentError: + When a component is not one of those allowed. + :raises PasswordForbidden: + When a password is present in the userinfo component but is + not permitted by configuration. + """ + if not self.allow_password: + check_password(uri) + + required_components = [ + component + for component, required in self.required_components.items() + if required + ] + if required_components: + ensure_required_components_exist(uri, required_components) + + ensure_one_of(self.allowed_schemes, uri, 'scheme') + ensure_one_of(self.allowed_hosts, uri, 'host') + ensure_one_of(self.allowed_ports, uri, 'port') + + +def check_password(uri): + """Assert that there is no password present in the uri.""" + userinfo = uri.userinfo + if not userinfo: + return + credentials = userinfo.split(':', 1) + if len(credentials) <= 1: + return + raise exceptions.PasswordForbidden(uri) + + +def ensure_one_of(allowed_values, uri, attribute): + """Assert that the uri's attribute is one of the allowed values.""" + value = getattr(uri, attribute) + if value is not None and allowed_values and value not in allowed_values: + raise exceptions.UnpermittedComponentError( + attribute, value, allowed_values, + ) + + +def ensure_required_components_exist(uri, required_components): + """Assert that all required components are present in the URI.""" + missing_components = sorted([ + component + for component in required_components + if getattr(uri, component) is None + ]) + if missing_components: + raise exceptions.MissingComponentError(uri, *missing_components) + + +def is_valid(value, matcher, require): + """Determine if a value is valid based on the provided matcher. + + :param str value: + Value to validate. + :param matcher: + Compiled regular expression to use to validate the value. + :param require: + Whether or not the value is required. + """ + if require: + return (value is not None + and matcher.match(value)) + + # require is False and value is not None + return value is None or matcher.match(value) + + +def authority_is_valid(authority, host=None, require=False): + """Determine if the authority string is valid. + + :param str authority: + The authority to validate. + :param str host: + (optional) The host portion of the authority to validate. + :param bool require: + (optional) Specify if authority must not be None. + :returns: + ``True`` if valid, ``False`` otherwise + :rtype: + bool + """ + validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require) + if validated and host is not None and misc.IPv4_MATCHER.match(host): + return valid_ipv4_host_address(host) + return validated + + +def scheme_is_valid(scheme, require=False): + """Determine if the scheme is valid. + + :param str scheme: + The scheme string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a scheme. + :returns: + ``True`` if the scheme is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(scheme, misc.SCHEME_MATCHER, require) + + +def path_is_valid(path, require=False): + """Determine if the path component is valid. + + :param str path: + The path string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a path. + :returns: + ``True`` if the path is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(path, misc.PATH_MATCHER, require) + + +def query_is_valid(query, require=False): + """Determine if the query component is valid. + + :param str query: + The query string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a query. + :returns: + ``True`` if the query is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(query, misc.QUERY_MATCHER, require) + + +def fragment_is_valid(fragment, require=False): + """Determine if the fragment component is valid. + + :param str fragment: + The fragment string to validate. + :param bool require: + (optional) Set to ``True`` to require the presence of a fragment. + :returns: + ``True`` if the fragment is valid. ``False`` otherwise. + :rtype: + bool + """ + return is_valid(fragment, misc.FRAGMENT_MATCHER, require) + + +def valid_ipv4_host_address(host): + """Determine if the given host is a valid IPv4 address.""" + # If the host exists, and it might be IPv4, check each byte in the + # address. + return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')]) diff --git a/tests/conftest.py b/tests/conftest.py index 6474931..7358b9e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,16 +7,30 @@ import pytest SNOWMAN = b'\xe2\x98\x83' valid_hosts = [ - '[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', '[::1]', - '[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', '[FE80::2AA:FF:FE9A:4CA2]', - '[FF02::2]', '[FF02:3::5]', '[FF02:0:0:0:0:0:0:2]', - '[FF02:30:0:0:0:0:0:5]', '127.0.0.1', 'www.example.com', 'localhost', + '[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', + '[::1]', + '[::1%25lo]', # With ZoneID + '[FF02:0:0:0:0:0:0:2%25en01]', # With ZoneID + '[FF02:30:0:0:0:0:0:5%25en1]', # With ZoneID + '[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', + '[FE80::2AA:FF:FE9A:4CA2]', + '[FF02::2]', + '[FF02:3::5]', + '[FF02:0:0:0:0:0:0:2]', + '[FF02:30:0:0:0:0:0:5]', + '127.0.0.1', + 'www.example.com', + 'localhost', 'http-bin.org', ] invalid_hosts = [ '[FF02::3::5]', # IPv6 can only have one :: '[FADF:01]', # Not properly compacted (missing a :) + '[FADF:01%en0]', # Not properly compacted (missing a :), Invalid ZoneID + '[FADF::01%en0]', # Invalid ZoneID separator + '[FADF::01%]', # Invalid ZoneID separator and no ZoneID + '[FADF::01%25]', # Missing ZoneID 'localhost:80:80:80', # Too many ports '256.256.256.256', # Invalid IPv4 Address SNOWMAN.decode('utf-8') diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..251f353 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2017 Ian Cordasco +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module containing the tests for the URIBuilder object.""" +import pytest + +from rfc3986 import builder + + +def test_builder_default(): + """Verify the default values.""" + uribuilder = builder.URIBuilder() + assert uribuilder.scheme is None + assert uribuilder.userinfo is None + assert uribuilder.host is None + assert uribuilder.port is None + assert uribuilder.path is None + assert uribuilder.query is None + assert uribuilder.fragment is None + + +def test_repr(): + """Verify our repr looks like our class.""" + uribuilder = builder.URIBuilder() + assert repr(uribuilder).startswith('URIBuilder(scheme=None') + + +@pytest.mark.parametrize('scheme', [ + 'https', + 'hTTps', + 'Https', + 'HtTpS', + 'HTTPS', +]) +def test_add_scheme(scheme): + """Verify schemes are normalized when added.""" + uribuilder = builder.URIBuilder().add_scheme(scheme) + assert uribuilder.scheme == 'https' + + +@pytest.mark.parametrize('username, password, userinfo', [ + ('user', 'pass', 'user:pass'), + ('user', None, 'user'), + ('user@domain.com', 'password', 'user%40domain.com:password'), + ('user', 'pass:word', 'user:pass%3Aword'), +]) +def test_add_credentials(username, password, userinfo): + """Verify we normalize usernames and passwords.""" + uribuilder = builder.URIBuilder().add_credentials(username, password) + assert uribuilder.userinfo == userinfo + + +def test_add_credentials_requires_username(): + """Verify one needs a username to add credentials.""" + with pytest.raises(ValueError): + builder.URIBuilder().add_credentials(None, None) + + +@pytest.mark.parametrize('hostname', [ + 'google.com', + 'GOOGLE.COM', + 'gOOgLe.COM', + 'goOgLE.com', +]) +def test_add_host(hostname): + """Verify we normalize hostnames in add_host.""" + uribuilder = builder.URIBuilder().add_host(hostname) + assert uribuilder.host == 'google.com' + + +@pytest.mark.parametrize('port', [ + -100, + '-100', + -1, + '-1', + 65536, + '65536', + 1000000, + '1000000', + '', + 'abc', + '0b10', +]) +def test_add_invalid_port(port): + """Verify we raise a ValueError for invalid ports.""" + with pytest.raises(ValueError): + builder.URIBuilder().add_port(port) + + +@pytest.mark.parametrize('port, expected', [ + (0, '0'), + ('0', '0'), + (1, '1'), + ('1', '1'), + (22, '22'), + ('22', '22'), + (80, '80'), + ('80', '80'), + (443, '443'), + ('443', '443'), + (65535, '65535'), + ('65535', '65535'), +]) +def test_add_port(port, expected): + """Verify we normalize our port.""" + uribuilder = builder.URIBuilder().add_port(port) + assert uribuilder.port == expected + + +@pytest.mark.parametrize('path', [ + 'sigmavirus24/rfc3986', + '/sigmavirus24/rfc3986', +]) +def test_add_path(path): + """Verify we normalize our path value.""" + uribuilder = builder.URIBuilder().add_path(path) + assert uribuilder.path == '/sigmavirus24/rfc3986' + + +@pytest.mark.parametrize('query_items, expected', [ + ({'a': 'b c'}, 'a=b+c'), + ({'a': 'b+c'}, 'a=b%2Bc'), + ([('a', 'b c')], 'a=b+c'), + ([('a', 'b+c')], 'a=b%2Bc'), + ([('a', 'b'), ('c', 'd')], 'a=b&c=d'), + ([('a', 'b'), ('username', '@d')], 'a=b&username=%40d'), +]) +def test_add_query_from(query_items, expected): + """Verify the behaviour of add_query_from.""" + uribuilder = builder.URIBuilder().add_query_from(query_items) + assert uribuilder.query == expected + + +def test_add_query(): + """Verify we do not modify the provided query string.""" + uribuilder = builder.URIBuilder().add_query('username=@foo') + assert uribuilder.query == 'username=@foo' + + +def test_add_fragment(): + """Verify our handling of fragments.""" + uribuilder = builder.URIBuilder().add_fragment('section-2.5.1') + assert uribuilder.fragment == 'section-2.5.1' + + +def test_finalize(): + """Verify the whole thing.""" + uri = builder.URIBuilder().add_scheme('https').add_credentials( + 'sigmavirus24', 'not-my-re@l-password' + ).add_host('github.com').add_path('sigmavirus24/rfc3986').finalize( + ).unsplit() + expected = ('https://sigmavirus24:not-my-re%40l-password@github.com/' + 'sigmavirus24/rfc3986') + assert expected == uri diff --git a/tests/test_validators.py b/tests/test_validators.py new file mode 100644 index 0000000..8aef1a8 --- /dev/null +++ b/tests/test_validators.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +"""Tests for the validators module.""" +import rfc3986 +from rfc3986 import exceptions +from rfc3986 import validators + +import pytest + + +def test_defaults(): + """Verify the default Validator settings.""" + validator = validators.Validator() + + assert validator.required_components == { + c: False for c in validator.COMPONENT_NAMES + } + assert validator.allow_password is True + assert validator.allowed_schemes == set() + assert validator.allowed_hosts == set() + assert validator.allowed_ports == set() + + +def test_allowing_schemes(): + """Verify the ability to select schemes to be allowed.""" + validator = validators.Validator().allow_schemes('http', 'https') + + assert 'http' in validator.allowed_schemes + assert 'https' in validator.allowed_schemes + + +def test_allowing_hosts(): + """Verify the ability to select hosts to be allowed.""" + validator = validators.Validator().allow_hosts( + 'pypi.python.org', 'pypi.org', + ) + + assert 'pypi.python.org' in validator.allowed_hosts + assert 'pypi.org' in validator.allowed_hosts + + +def test_allowing_ports(): + """Verify the ability select ports to be allowed.""" + validator = validators.Validator().allow_ports('80', '100') + + assert '80' in validator.allowed_ports + assert '100' in validator.allowed_ports + + +def test_requiring_invalid_component(): + """Verify that we validate required component names.""" + with pytest.raises(ValueError): + validators.Validator().require_presence_of('frob') + + +def test_use_of_password(): + """Verify the behaviour of {forbid,allow}_use_of_password.""" + validator = validators.Validator() + assert validator.allow_password is True + + validator.forbid_use_of_password() + assert validator.allow_password is False + + validator.allow_use_of_password() + assert validator.allow_password is True + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://user:password@github.com'), + rfc3986.uri_reference('https://user:password@github.com/path'), + rfc3986.uri_reference('https://user:password@github.com/path?query'), + rfc3986.uri_reference('https://user:password@github.com/path?query#frag'), + rfc3986.uri_reference('//user:password@github.com'), +]) +def test_forbidden_passwords(uri): + """Verify that passwords are disallowed.""" + validator = validators.Validator().forbid_use_of_password() + with pytest.raises(exceptions.PasswordForbidden): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://user@github.com'), + rfc3986.uri_reference('https://user@github.com/path'), + rfc3986.uri_reference('https://user@github.com/path?query'), + rfc3986.uri_reference('https://user@github.com/path?query#frag'), + rfc3986.uri_reference('//user@github.com'), + rfc3986.uri_reference('//github.com'), + rfc3986.uri_reference('https://github.com'), +]) +def test_passwordless_uris_pass_validation(uri): + """Verify password-less URLs validate properly.""" + validator = validators.Validator().forbid_use_of_password() + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://'), + rfc3986.uri_reference('/path/to/resource'), +]) +def test_missing_host_component(uri): + """Verify that missing host components cause errors.""" + validator = validators.Validator().require_presence_of('host') + with pytest.raises(exceptions.MissingComponentError): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://'), + rfc3986.uri_reference('//google.com'), + rfc3986.uri_reference('//google.com?query=value'), + rfc3986.uri_reference('//google.com#fragment'), + rfc3986.uri_reference('https://google.com'), + rfc3986.uri_reference('https://google.com#fragment'), + rfc3986.uri_reference('https://google.com?query=value'), +]) +def test_missing_path_component(uri): + """Verify that missing path components cause errors.""" + validator = validators.Validator().require_presence_of('path') + with pytest.raises(exceptions.MissingComponentError): + validator.validate(uri) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('//google.com'), + rfc3986.uri_reference('//google.com?query=value'), + rfc3986.uri_reference('//google.com#fragment'), +]) +def test_multiple_missing_components(uri): + """Verify that multiple missing components are caught.""" + validator = validators.Validator().require_presence_of('scheme', 'path') + with pytest.raises(exceptions.MissingComponentError) as captured_exc: + validator.validate(uri) + exception = captured_exc.value + assert 2 == len(exception.args[-1]) + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('smtp://'), + rfc3986.uri_reference('telnet://'), +]) +def test_ensure_uri_has_a_scheme(uri): + """Verify validation with allowed schemes.""" + validator = validators.Validator().allow_schemes('https', 'http') + with pytest.raises(exceptions.UnpermittedComponentError): + validator.validate(uri) + + +@pytest.mark.parametrize('uri, failed_component', [ + (rfc3986.uri_reference('git://github.com'), 'scheme'), + (rfc3986.uri_reference('http://github.com'), 'scheme'), + (rfc3986.uri_reference('ssh://gitlab.com'), 'host'), + (rfc3986.uri_reference('https://gitlab.com'), 'host'), +]) +def test_allowed_hosts_and_schemes(uri, failed_component): + """Verify each of these fails.""" + validator = validators.Validator().allow_schemes( + 'https', 'ssh', + ).allow_hosts( + 'github.com', 'git.openstack.org', + ) + with pytest.raises(exceptions.UnpermittedComponentError) as caught_exc: + validator.validate(uri) + + exc = caught_exc.value + assert exc.component_name == failed_component + + +@pytest.mark.parametrize('uri', [ + rfc3986.uri_reference('https://github.com/sigmavirus24'), + rfc3986.uri_reference('ssh://github.com/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@github.com:22/sigmavirus24'), + rfc3986.uri_reference('https://github.com:443/sigmavirus24'), + rfc3986.uri_reference('https://gitlab.com/sigmavirus24'), + rfc3986.uri_reference('ssh://gitlab.com/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@gitlab.com:22/sigmavirus24'), + rfc3986.uri_reference('https://gitlab.com:443/sigmavirus24'), + rfc3986.uri_reference('https://bitbucket.org/sigmavirus24'), + rfc3986.uri_reference('ssh://bitbucket.org/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@bitbucket.org:22/sigmavirus24'), + rfc3986.uri_reference('https://bitbucket.org:443/sigmavirus24'), + rfc3986.uri_reference('https://git.openstack.org/sigmavirus24'), + rfc3986.uri_reference('ssh://git.openstack.org/sigmavirus24'), + rfc3986.uri_reference('ssh://ssh@git.openstack.org:22/sigmavirus24'), + rfc3986.uri_reference('https://git.openstack.org:443/sigmavirus24'), +]) +def test_successful_complex_validation(uri): + """Verify we do not raise ValidationErrors for good URIs.""" + validators.Validator().allow_schemes( + 'https', 'ssh', + ).allow_hosts( + 'github.com', 'bitbucket.org', 'gitlab.com', 'git.openstack.org', + ).allow_ports( + '22', '443', + ).require_presence_of( + 'scheme', 'host', 'path', + ).validate(uri) diff --git a/tox.ini b/tox.ini index 0a94d31..6752851 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py32,py33,py34,pypy,{py27,py34}-flake8 +envlist = py27,py33,py34,py35,py36,pypy,flake8 [testenv] pip_pre = False @@ -12,31 +12,38 @@ commands = deps = {[testenv]deps} commands = py.test {posargs} -[testenv:py27-flake8] -basepython = python2.7 +[testenv:flake8] +basepython = python3 deps = flake8 -commands = flake8 {posargs} rfc3986 + flake8-docstrings + flake8-import-order +commands = flake8 {posargs} src/rfc3986 -[testenv:py34-flake8] -basepython = python3.4 +[testenv:venv] +commands = {posargs} + +[testenv:build] deps = - flake8 -commands = flake8 {posargs} rfc3986 + wheel +commands = + python setup.py sdist bdist_wheel [testenv:release] deps = - wheel + {[testenv:build]deps} twine>=1.4.0 commands = - python setup.py sdist bdist_wheel + {[testenv:build]commands} twine upload {posargs:--skip-existing dist/*} [testenv:docs] deps = sphinx>=1.3.0 + sphinx-prompt commands = - sphinx-build -E -c docs -b html docs/ docs/_build/html + sphinx-build -WE -c docs/source/ -b html docs/source/ docs/build/html + sphinx-build -WE -c docs/source/ -b doctest docs/source/ docs/build/html [testenv:readme] deps = @@ -47,3 +54,15 @@ commands = [pytest] addopts = -q norecursedirs = *.egg .git .* _* + +[flake8] +exclude = + .tox, + .git, + __pycache__, + *.pyc, + *.egg-info, + .cache, + .eggs +max-complexity = 10 +import-order-style = google