Add support for Zone Identifiers from RFC 6874

Refs #2
This commit is contained in:
Ian Cordasco 2017-05-08 07:04:27 -05:00
parent 432f66d1a5
commit 21a1be2188
No known key found for this signature in database
GPG Key ID: 656D3395E4A9791A
4 changed files with 35 additions and 11 deletions

View File

@ -3,7 +3,8 @@
=========
|rfc3986| is a Python implementation of :rfc:`3986` including validation and
authority parsing.
authority parsing. This module also supports :rfc:`6874` which adds support
for zone identifiers to IPv6 Addresses.
The maintainers strongly suggest using `pip`_ to install |rfc3986|. For
example,

View File

@ -31,6 +31,10 @@ NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%')
# We need to escape the '-' in this case:
UNRESERVED_RE = 'A-Za-z0-9._~\-'
# Percent encoded character values
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED
# NOTE(sigmavirus24): We're going to use more strict regular expressions
# than appear in Appendix B for scheme. This will prevent over-eager
# consuming of items that aren't schemes.
@ -111,7 +115,16 @@ IPv_FUTURE_RE = 'v[0-9A-Fa-f]+.[%s]+' % (
UNRESERVED_RE + SUB_DELIMITERS_RE + ':'
)
IP_LITERAL_RE = '\[({0}|{1})\]'.format(IPv6_RE, IPv_FUTURE_RE)
# RFC 6874 Zone ID ABNF
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID
IP_LITERAL_RE = '\[({0}|(?:{1})|{2})\]'.format(
IPv6_RE,
IPv6_ADDRZ_RE,
IPv_FUTURE_RE,
)
# Pattern for matching the host piece of the authority
HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format(
@ -128,10 +141,6 @@ PORT_RE = '[0-9]{1,5}'
# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
# about the path patterns defined below.
# Percent encoded character values
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED
segments = {
'segment': PCHAR + '*',
# Non-zero length segment

View File

@ -51,7 +51,7 @@ URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
SUBAUTHORITY_MATCHER = re.compile((
'^(?:(?P<userinfo>{0})@)?' # userinfo
'(?P<host>{1}?)' # host
'(?P<host>{1})' # host
':?(?P<port>{2})?$' # port
).format(abnf_regexp.USERINFO_RE,
abnf_regexp.HOST_PATTERN,

View File

@ -7,16 +7,30 @@ import pytest
SNOWMAN = b'\xe2\x98\x83'
valid_hosts = [
'[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]', '[::1]',
'[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]', '[FE80::2AA:FF:FE9A:4CA2]',
'[FF02::2]', '[FF02:3::5]', '[FF02:0:0:0:0:0:0:2]',
'[FF02:30:0:0:0:0:0:5]', '127.0.0.1', 'www.example.com', 'localhost',
'[21DA:00D3:0000:2F3B:02AA:00FF:FE28:9C5A]',
'[::1]',
'[::1%25lo]', # With ZoneID
'[FF02:0:0:0:0:0:0:2%25en01]', # With ZoneID
'[FF02:30:0:0:0:0:0:5%25en1]', # With ZoneID
'[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]',
'[FE80::2AA:FF:FE9A:4CA2]',
'[FF02::2]',
'[FF02:3::5]',
'[FF02:0:0:0:0:0:0:2]',
'[FF02:30:0:0:0:0:0:5]',
'127.0.0.1',
'www.example.com',
'localhost',
'http-bin.org',
]
invalid_hosts = [
'[FF02::3::5]', # IPv6 can only have one ::
'[FADF:01]', # Not properly compacted (missing a :)
'[FADF:01%en0]', # Not properly compacted (missing a :), Invalid ZoneID
'[FADF::01%en0]', # Invalid ZoneID separator
'[FADF::01%]', # Invalid ZoneID separator and no ZoneID
'[FADF::01%25]', # Missing ZoneID
'localhost:80:80:80', # Too many ports
'256.256.256.256', # Invalid IPv4 Address
SNOWMAN.decode('utf-8')