add a new HtmlRenderer subclass with XSS protections

This commit is contained in:
Changaco 2017-01-13 13:02:44 +01:00
parent 53c2b953db
commit 1aa4e1f9e9
3 changed files with 184 additions and 1 deletions

View File

@ -228,6 +228,10 @@ Classes
:members:
.. autoclass:: SaferHtmlRenderer
:members:
.. autoclass:: HtmlTocRenderer
:members:

View File

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
import re
from ._hoedown import lib, ffi
from .callbacks import python_callbacks, to_string
from .constants import *
@ -15,6 +17,7 @@ __all__ = [
'BaseRenderer',
'HtmlRenderer',
'HtmlTocRenderer',
'SaferHtmlRenderer',
'args_to_int',
'extension_map',
@ -252,3 +255,91 @@ class HtmlTocRenderer(HtmlRenderer):
def _new_renderer(self, flags, nesting_level):
return lib.hoedown_html_toc_renderer_new(nesting_level)
class SaferHtmlRenderer(HtmlRenderer):
"""
A subclass of :class:`HtmlRenderer` which adds protections against
Cross-Site Scripting (XSS):
1. The ``'skip-html'`` flag is turned on by default, preventing injection of
HTML elements. If you want to escape HTML code instead of removing it
entirely, change ``sanitization_mode`` to ``'escape'``.
2. The URLs of links and images are filtered to prevent JavaScript injection.
See the :meth:`check_link` method below.
3. Optionally, the URLs can also be rewritten to counter other attacks such
as phishing.
"""
_allowed_url_re = re.compile(r'^https?:', re.I)
def __init__(self, flags=(), sanitization_mode='skip-html', nesting_level=0):
if not isinstance(flags, tuple):
raise TypeError("`flags` should be a tuple of strings")
HtmlRenderer.__init__(self, flags + (sanitization_mode,), nesting_level)
def autolink(self, raw_link, is_email):
"""
Filters links generated by the ``autolink`` extension.
"""
if self.check_link(raw_link):
link = self.rewrite_link(('mailto:' if is_email else '') + raw_link)
link = escape_html(link)
return '<a href="%s">%s</a>' % (link, escape_html(raw_link))
else:
return escape_html('<%s>' % raw_link)
def image(self, raw_link, title='', alt=''):
"""
Filters the ``src`` attribute of an image.
Note that filtering the source URL of an ``<img>`` tag is only a very
basic protection, and it's mostly useless in modern browsers (they block
JavaScript in there by default). An example of attack that filtering
does not thwart is phishing based on HTTP Auth, see `this issue
<https://github.com/liberapay/liberapay.com/issues/504>`_ for details.
To mitigate this issue you should only allow images from trusted services,
for example your own image store, or a proxy (see :meth:`rewrite_link`).
"""
if self.check_link(raw_link):
link = self.rewrite_link(raw_link, is_image_src=True)
maybe_alt = ' alt="%s"' % escape_html(alt) if alt else ''
maybe_title = ' title="%s"' % escape_html(title) if title else ''
link = escape_html(link)
return '<img src="%s"%s%s />' % (link, maybe_alt, maybe_title)
else:
return escape_html("![%s](%s)" % (alt, raw_link))
def link(self, content, raw_link, title=''):
"""
Filters links.
"""
if self.check_link(raw_link):
link = self.rewrite_link(raw_link)
maybe_title = ' title="%s"' % escape_html(title) if title else ''
link = escape_html(link)
return ('<a href="%s"%s>' + content + '</a>') % (link, maybe_title)
else:
return escape_html("[%s](%s)" % (content, raw_link))
def check_link(self, link, is_image_src=False):
"""
This method is used to check a URL.
Returns :obj:`True` if the URL is "safe", :obj:`False` otherwise.
The default implementation only allows HTTP and HTTPS links. Using a
blacklist approach is not recommended, see the
`OWASP XSS Filter Evasion Cheat Sheet
<https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet>`_ for
an illustration of why.
"""
return bool(self._allowed_url_re.match(link))
def rewrite_link(self, link, is_image_src=False):
"""
This method is called to rewrite URLs.
The default implementation simply returns the given link.
"""
return link

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from chibitest import TestCase, ok
from misaka import escape_html
from misaka import escape_html, Markdown, SaferHtmlRenderer
class EscapeHtmlTest(TestCase):
@ -10,3 +10,91 @@ class EscapeHtmlTest(TestCase):
def test_escape_html_slash(self):
ok(escape_html('a&<>"\'/', True)) == 'a&amp;&lt;&gt;&quot;&#39;&#47;'
render = Markdown(SaferHtmlRenderer())
render_escape = Markdown(SaferHtmlRenderer(sanitization_mode='escape'))
class SaferHtmlRendererTest(TestCase):
def test_html_skip(self):
actual = render('Example <script>alert(1);</script>')
expected = '<p>Example alert(1);</p>\n'
ok(actual).diff(expected)
html = render('<sc<script>ript>xss</sc</script>ript>')
ok(html).not_contains('<sc')
ok(html).not_contains('ript>')
actual = render('<span><a href="javascript:xss">foo</a></span>')
expected = '<p>foo</p>\n'
ok(actual).diff(expected)
def test_html_escape(self):
supplied = 'Example <script>alert(1);</script>'
expected = '<p>%s</p>\n' % escape_html(supplied)
ok(render_escape(supplied)).diff(expected)
html = render_escape('<sc<script>ript>xss</sc</script>ript>')
ok(html).not_contains('<sc')
ok(html).not_contains('ript>')
supplied = '<span><a href="javascript:xss">foo</a></span>'
expected = '<p>%s</p>\n' % escape_html(supplied)
ok(render_escape(supplied)).diff(expected)
def test_autolink_filtering_with_nice_data(self):
for url in ('http://a', "https://b?x&y"):
actual = render('<%s>' % url)
expected = '<p><a href="{0}">{0}</a></p>\n'.format(escape_html(url))
ok(actual).diff(expected)
supplied = "<alice@example.net>"
expected = '<p>%s</p>\n' % escape_html(supplied)
ok(render_escape(supplied)).diff(expected)
def test_autolink_filtering_with_naughty_data(self):
actual = render('<javascript:foo>')
expected = '<p>&lt;javascript:foo&gt;</p>\n'
ok(actual).diff(expected)
link = 'javascript:0'
encoded_link = ''.join('&x{0:x};'.format(ord(c)) for c in link)
html = render('<%s>' % encoded_link)
ok(html).not_contains(link)
def test_link_filtering_with_nice_data(self):
for url in ('http://a', 'https://b'):
actual = render("['foo](%s \"bar'\")" % url)
expected = '<p><a href="{0}" title="bar&#39;">&#39;foo</a></p>\n'.format(url)
ok(actual).diff(expected)
def test_link_filtering_with_naughty_data(self):
supplied = '[foo](javascript:xss)'
expected = '<p>%s</p>\n' % escape_html(supplied)
ok(render(supplied)).diff(expected)
html = render('[foo](unknown:bar)')
expected = '<p>%s</p>\n' % escape_html(supplied)
ok(render(supplied)).diff(expected)
html = render('[" xss><xss>]("><xss>)')
ok(html).not_contains('<xss>')
ok(html).not_contains('" xss')
html = render('[" xss><xss>](https:"><xss>)')
ok(html).not_contains('<xss>')
ok(html).not_contains('" xss')
def test_image_src_filtering_with_nice_data(self):
actual = render('![](http:"foo")')
expected = '<p><img src="http:&quot;foo&quot;" /></p>\n'
ok(actual).diff(expected)
actual = render('!["bar"](https://example.org/ "\'title\'")')
expected = '<p><img src="https://example.org/" alt="&quot;bar&quot;" title="&#39;title&#39;" /></p>\n'
ok(actual).diff(expected)
def test_image_src_filtering_with_naughty_data(self):
actual = render('![foo](javascript:foo)')
expected = '<p>![foo](javascript:foo)</p>\n'
ok(actual).diff(expected)