fix: URI template field parsing bugs (#1019)
Fix the following bugs: * Only the '.' special character is properly handled, even though some URL schemas, such as OData, make use of other characters, like parens. * Using duplicate field names leads to a nasty re error being raised in the case of them being contained to the same path segment, and a silent overwriting of param values in the case of field values being duplicated across multiple segments.
This commit is contained in:
parent
082dca5991
commit
b1517e8104
|
@ -341,9 +341,11 @@ class API(object):
|
|||
pass
|
||||
|
||||
Individual path segments may contain one or more field
|
||||
expressions::
|
||||
expressions, and fields need not span the entire path
|
||||
segment. For example::
|
||||
|
||||
/repos/{org}/{repo}/compare/{usr0}:{branch0}...{usr1}:{branch1}
|
||||
/serviceRoot/People('{name}')
|
||||
|
||||
Note:
|
||||
Because field names correspond to argument names in responder
|
||||
|
|
|
@ -18,7 +18,8 @@ import keyword
|
|||
import re
|
||||
|
||||
|
||||
TAB_STR = ' ' * 4
|
||||
_FIELD_REGEX = re.compile('{([^}]*)}')
|
||||
_TAB_STR = ' ' * 4
|
||||
|
||||
|
||||
class CompiledRouter(object):
|
||||
|
@ -57,13 +58,27 @@ class CompiledRouter(object):
|
|||
raise ValueError('URI templates may not include whitespace.')
|
||||
|
||||
# NOTE(kgriffs): Ensure fields are valid Python identifiers,
|
||||
# since they will be passed as kwargs to responders.
|
||||
fields = re.findall('{([^}]*)}', uri_template)
|
||||
for field in fields:
|
||||
is_identifier = re.match('[A-Za-z_][A-Za-z0-9_]*$', field)
|
||||
if not is_identifier or field in keyword.kwlist:
|
||||
# since they will be passed as kwargs to responders. Also
|
||||
# ensure there are no duplicate names, since that causes the
|
||||
# following problems:
|
||||
#
|
||||
# 1. For simple nodes, values from deeper nodes overwrite
|
||||
# values from more shallow nodes.
|
||||
# 2. For complex nodes, re.compile() raises a nasty error
|
||||
#
|
||||
fields = _FIELD_REGEX.findall(uri_template)
|
||||
used_names = set()
|
||||
for name in fields:
|
||||
is_identifier = re.match('[A-Za-z_][A-Za-z0-9_]*$', name)
|
||||
if not is_identifier or name in keyword.kwlist:
|
||||
raise ValueError('Field names must be valid identifiers '
|
||||
'(`{}` invalid).'.format(field))
|
||||
"('{}' is not valid).".format(name))
|
||||
|
||||
if name in used_names:
|
||||
raise ValueError('Field names may not be duplicated '
|
||||
"('{}' was used more than once)".format(name))
|
||||
|
||||
used_names.add(name)
|
||||
|
||||
path = uri_template.strip('/').split('/')
|
||||
|
||||
|
@ -141,7 +156,7 @@ class CompiledRouter(object):
|
|||
"""Generates Python code for a routing tree or subtree."""
|
||||
|
||||
def line(text, indent_offset=0):
|
||||
pad = TAB_STR * (indent + indent_offset)
|
||||
pad = _TAB_STR * (indent + indent_offset)
|
||||
self._code_lines.append(pad + text)
|
||||
|
||||
# NOTE(kgriffs): Base case
|
||||
|
@ -246,14 +261,14 @@ class CompiledRouter(object):
|
|||
self._expressions = []
|
||||
self._code_lines = [
|
||||
'def find(path, return_values, expressions, params):',
|
||||
TAB_STR + 'path_len = len(path)',
|
||||
_TAB_STR + 'path_len = len(path)',
|
||||
]
|
||||
|
||||
self._compile_tree(self._roots)
|
||||
|
||||
self._code_lines.append(
|
||||
# PERF(kgriffs): Explicit return of None is faster than implicit
|
||||
TAB_STR + 'return None'
|
||||
_TAB_STR + 'return None'
|
||||
)
|
||||
|
||||
self._src = '\n'.join(self._code_lines)
|
||||
|
@ -267,8 +282,6 @@ class CompiledRouter(object):
|
|||
class CompiledRouterNode(object):
|
||||
"""Represents a single URI segment in a URI."""
|
||||
|
||||
_regex_vars = re.compile('{([-_a-zA-Z0-9]+)}')
|
||||
|
||||
def __init__(self, raw_segment,
|
||||
method_map=None, resource=None, uri_template=None):
|
||||
self.children = []
|
||||
|
@ -282,39 +295,50 @@ class CompiledRouterNode(object):
|
|||
self.is_complex = False
|
||||
self.var_name = None
|
||||
|
||||
seg = raw_segment.replace('.', '\\.')
|
||||
# NOTE(kgriffs): CompiledRouter.add_route validates field names,
|
||||
# so here we can just assume they are OK and use the simple
|
||||
# _FIELD_REGEX to match them.
|
||||
matches = list(_FIELD_REGEX.finditer(raw_segment))
|
||||
|
||||
matches = list(self._regex_vars.finditer(seg))
|
||||
if matches:
|
||||
if not matches:
|
||||
self.is_var = False
|
||||
else:
|
||||
self.is_var = True
|
||||
# NOTE(richardolsson): if there is a single variable and it spans
|
||||
# the entire segment, the segment is uncomplex and the variable
|
||||
# name is simply the string contained within curly braces.
|
||||
if len(matches) == 1 and matches[0].span() == (0, len(seg)):
|
||||
|
||||
if len(matches) == 1 and matches[0].span() == (0, len(raw_segment)):
|
||||
# NOTE(richardolsson): if there is a single variable and
|
||||
# it spans the entire segment, the segment is not
|
||||
# complex and the variable name is simply the string
|
||||
# contained within curly braces.
|
||||
self.is_complex = False
|
||||
self.var_name = raw_segment[1:-1]
|
||||
else:
|
||||
# NOTE(richardolsson): Complex segments need to be converted
|
||||
# into regular expressions will be used to match and extract
|
||||
# variable values. The regular expressions contain both
|
||||
# literal spans and named group expressions for the variables.
|
||||
# NOTE(richardolsson): Complex segments need to be
|
||||
# converted into regular expressions in order to match
|
||||
# and extract variable values. The regular expressions
|
||||
# contain both literal spans and named group expressions
|
||||
# for the variables.
|
||||
|
||||
# NOTE(kgriffs): Don't use re.escape() since we do not
|
||||
# want to escape '{' or '}', and we don't want to
|
||||
# introduce any unexpected side-effects by escaping
|
||||
# non-ASCII characters (it is probably safe, but let's
|
||||
# not take that chance in a minor point release).
|
||||
#
|
||||
# NOTE(kgriffs): The substitution template parser in the
|
||||
# re library does not look ahead when collapsing '\\':
|
||||
# therefore in the case of r'\\g<0>' the first r'\\'
|
||||
# would be consumed and collapsed to r'\', and then the
|
||||
# parser would examine 'g<0>' and not realize it is a
|
||||
# group-escape sequence. So we add an extra backslash to
|
||||
# trick the parser into doing the right thing.
|
||||
escaped_segment = re.sub(r'[\.\(\)\[\]\?\$\*\+\^\|]', r'\\\g<0>', raw_segment)
|
||||
|
||||
seg_pattern = _FIELD_REGEX.sub(r'(?P<\1>.+)', escaped_segment)
|
||||
seg_pattern = '^' + seg_pattern + '$'
|
||||
|
||||
self.is_complex = True
|
||||
seg_fields = []
|
||||
prev_end_idx = 0
|
||||
for match in matches:
|
||||
var_start_idx, var_end_idx = match.span()
|
||||
seg_fields.append(seg[prev_end_idx:var_start_idx])
|
||||
|
||||
var_name = match.groups()[0].replace('-', '_')
|
||||
seg_fields.append('(?P<%s>[^/]+)' % var_name)
|
||||
|
||||
prev_end_idx = var_end_idx
|
||||
|
||||
seg_fields.append(seg[prev_end_idx:])
|
||||
seg_pattern = ''.join(seg_fields)
|
||||
self.var_regex = re.compile(seg_pattern)
|
||||
else:
|
||||
self.is_var = False
|
||||
|
||||
def matches(self, segment):
|
||||
"""Returns True if this node matches the supplied template segment."""
|
||||
|
@ -365,8 +389,8 @@ class CompiledRouterNode(object):
|
|||
#
|
||||
if self.is_complex:
|
||||
if other.is_complex:
|
||||
return (self._regex_vars.sub('v', self.raw_segment) ==
|
||||
self._regex_vars.sub('v', segment))
|
||||
return (_FIELD_REGEX.sub('v', self.raw_segment) ==
|
||||
_FIELD_REGEX.sub('v', segment))
|
||||
|
||||
return False
|
||||
else:
|
||||
|
|
|
@ -105,7 +105,7 @@ class ResourceWithId(object):
|
|||
# =====================================================================
|
||||
|
||||
|
||||
def test_regression_versioned_url():
|
||||
def test_user_regression_versioned_url():
|
||||
router = DefaultRouter()
|
||||
router.add_route('/{version}/messages', {}, ResourceWithId(2))
|
||||
|
||||
|
@ -127,7 +127,7 @@ def test_regression_versioned_url():
|
|||
assert route is None
|
||||
|
||||
|
||||
def test_regression_case_recipes():
|
||||
def test_user_regression_recipes():
|
||||
router = DefaultRouter()
|
||||
router.add_route(
|
||||
'/recipes/{activity}/{type_id}', {}, ResourceWithId(1))
|
||||
|
@ -144,6 +144,63 @@ def test_regression_case_recipes():
|
|||
assert route is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('uri_template,path,expected_params', [
|
||||
('/serviceRoot/People|{field}', '/serviceRoot/People|susie', {'field': 'susie'}),
|
||||
('/serviceRoot/People[{field}]', "/serviceRoot/People['calvin']", {'field': "'calvin'"}),
|
||||
('/serviceRoot/People({field})', "/serviceRoot/People('hobbes')", {'field': "'hobbes'"}),
|
||||
('/serviceRoot/People({field})', "/serviceRoot/People('hob)bes')", {'field': "'hob)bes'"}),
|
||||
('/serviceRoot/People({field})(z)', '/serviceRoot/People(hobbes)(z)', {'field': 'hobbes'}),
|
||||
("/serviceRoot/People('{field}')", "/serviceRoot/People('rosalyn')", {'field': 'rosalyn'}),
|
||||
('/^{field}', '/^42', {'field': '42'}),
|
||||
('/+{field}', '/+42', {'field': '42'}),
|
||||
(
|
||||
'/foo/{first}_{second}/bar',
|
||||
'/foo/abc_def_ghijk/bar',
|
||||
|
||||
# NOTE(kgriffs): The regex pattern is greedy, so this is
|
||||
# expected. We can not change this behavior in a minor
|
||||
# release, since it would be a breaking change. If there
|
||||
# is enough demand for it, we could introduce an option
|
||||
# to toggle this behavior.
|
||||
{'first': 'abc_def', 'second': 'ghijk'},
|
||||
),
|
||||
|
||||
# NOTE(kgriffs): Why someone would use a question mark like this
|
||||
# I have no idea (esp. since it would have to be encoded to avoid
|
||||
# being mistaken for the query string separator). Including it only
|
||||
# for completeness.
|
||||
('/items/{x}?{y}', '/items/1080?768', {'x': '1080', 'y': '768'}),
|
||||
|
||||
('/items/{x}|{y}', '/items/1080|768', {'x': '1080', 'y': '768'}),
|
||||
('/items/{x},{y}', '/items/1080,768', {'x': '1080', 'y': '768'}),
|
||||
('/items/{x}^^{y}', '/items/1080^^768', {'x': '1080', 'y': '768'}),
|
||||
('/items/{x}*{y}*', '/items/1080*768*', {'x': '1080', 'y': '768'}),
|
||||
('/thing-2/something+{field}+', '/thing-2/something+42+', {'field': '42'}),
|
||||
('/thing-2/something*{field}/notes', '/thing-2/something*42/notes', {'field': '42'}),
|
||||
(
|
||||
'/thing-2/something+{field}|{q}/notes',
|
||||
'/thing-2/something+else|z/notes',
|
||||
{'field': 'else', 'q': 'z'},
|
||||
),
|
||||
(
|
||||
"serviceRoot/$metadata#Airports('{field}')/Name",
|
||||
"serviceRoot/$metadata#Airports('KSFO')/Name",
|
||||
{'field': 'KSFO'},
|
||||
),
|
||||
])
|
||||
def test_user_regression_special_chars(uri_template, path, expected_params):
|
||||
router = DefaultRouter()
|
||||
|
||||
router.add_route(uri_template, {}, ResourceWithId(1))
|
||||
|
||||
route = router.find(path)
|
||||
assert route is not None
|
||||
|
||||
resource, __, params, __ = route
|
||||
assert resource.resource_id == 1
|
||||
assert params == expected_params
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# Other tests
|
||||
# =====================================================================
|
||||
|
@ -168,6 +225,33 @@ def test_root_path():
|
|||
assert resource.resource_id == 42
|
||||
|
||||
|
||||
@pytest.mark.parametrize('uri_template', [
|
||||
'/{field}{field}',
|
||||
'/{field}...{field}',
|
||||
'/{field}/{another}/{field}',
|
||||
'/{field}/something/something/{field}/something',
|
||||
])
|
||||
def test_duplicate_field_names(uri_template):
|
||||
router = DefaultRouter()
|
||||
with pytest.raises(ValueError):
|
||||
router.add_route(uri_template, {}, ResourceWithId(1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('uri_template,path', [
|
||||
('/items/thing', '/items/t'),
|
||||
('/items/{x}|{y}|', '/items/1080|768'),
|
||||
('/items/{x}*{y}foo', '/items/1080*768foobar'),
|
||||
('/items/{x}*768*', '/items/1080*768***'),
|
||||
])
|
||||
def test_match_entire_path(uri_template, path):
|
||||
router = DefaultRouter()
|
||||
|
||||
router.add_route(uri_template, {}, ResourceWithId(1))
|
||||
|
||||
route = router.find(path)
|
||||
assert route is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('uri_template', [
|
||||
'/teams/{collision}', # simple vs simple
|
||||
'/emojis/signs/{id_too}', # another simple vs simple
|
||||
|
|
Loading…
Reference in New Issue