add a new HtmlRenderer subclass with XSS protections
This commit is contained in:
parent
53c2b953db
commit
1aa4e1f9e9
|
@ -228,6 +228,10 @@ Classes
|
|||
:members:
|
||||
|
||||
|
||||
.. autoclass:: SaferHtmlRenderer
|
||||
:members:
|
||||
|
||||
|
||||
.. autoclass:: HtmlTocRenderer
|
||||
:members:
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from ._hoedown import lib, ffi
|
||||
from .callbacks import python_callbacks, to_string
|
||||
from .constants import *
|
||||
|
@ -15,6 +17,7 @@ __all__ = [
|
|||
'BaseRenderer',
|
||||
'HtmlRenderer',
|
||||
'HtmlTocRenderer',
|
||||
'SaferHtmlRenderer',
|
||||
|
||||
'args_to_int',
|
||||
'extension_map',
|
||||
|
@ -252,3 +255,91 @@ class HtmlTocRenderer(HtmlRenderer):
|
|||
|
||||
def _new_renderer(self, flags, nesting_level):
|
||||
return lib.hoedown_html_toc_renderer_new(nesting_level)
|
||||
|
||||
|
||||
class SaferHtmlRenderer(HtmlRenderer):
|
||||
"""
|
||||
A subclass of :class:`HtmlRenderer` which adds protections against
|
||||
Cross-Site Scripting (XSS):
|
||||
|
||||
1. The ``'skip-html'`` flag is turned on by default, preventing injection of
|
||||
HTML elements. If you want to escape HTML code instead of removing it
|
||||
entirely, change ``sanitization_mode`` to ``'escape'``.
|
||||
2. The URLs of links and images are filtered to prevent JavaScript injection.
|
||||
See the :meth:`check_link` method below.
|
||||
3. Optionally, the URLs can also be rewritten to counter other attacks such
|
||||
as phishing.
|
||||
"""
|
||||
_allowed_url_re = re.compile(r'^https?:', re.I)
|
||||
|
||||
def __init__(self, flags=(), sanitization_mode='skip-html', nesting_level=0):
|
||||
if not isinstance(flags, tuple):
|
||||
raise TypeError("`flags` should be a tuple of strings")
|
||||
HtmlRenderer.__init__(self, flags + (sanitization_mode,), nesting_level)
|
||||
|
||||
def autolink(self, raw_link, is_email):
|
||||
"""
|
||||
Filters links generated by the ``autolink`` extension.
|
||||
"""
|
||||
if self.check_link(raw_link):
|
||||
link = self.rewrite_link(('mailto:' if is_email else '') + raw_link)
|
||||
link = escape_html(link)
|
||||
return '<a href="%s">%s</a>' % (link, escape_html(raw_link))
|
||||
else:
|
||||
return escape_html('<%s>' % raw_link)
|
||||
|
||||
def image(self, raw_link, title='', alt=''):
|
||||
"""
|
||||
Filters the ``src`` attribute of an image.
|
||||
|
||||
Note that filtering the source URL of an ``<img>`` tag is only a very
|
||||
basic protection, and it's mostly useless in modern browsers (they block
|
||||
JavaScript in there by default). An example of attack that filtering
|
||||
does not thwart is phishing based on HTTP Auth, see `this issue
|
||||
<https://github.com/liberapay/liberapay.com/issues/504>`_ for details.
|
||||
|
||||
To mitigate this issue you should only allow images from trusted services,
|
||||
for example your own image store, or a proxy (see :meth:`rewrite_link`).
|
||||
"""
|
||||
if self.check_link(raw_link):
|
||||
link = self.rewrite_link(raw_link, is_image_src=True)
|
||||
maybe_alt = ' alt="%s"' % escape_html(alt) if alt else ''
|
||||
maybe_title = ' title="%s"' % escape_html(title) if title else ''
|
||||
link = escape_html(link)
|
||||
return '<img src="%s"%s%s />' % (link, maybe_alt, maybe_title)
|
||||
else:
|
||||
return escape_html("![%s](%s)" % (alt, raw_link))
|
||||
|
||||
def link(self, content, raw_link, title=''):
|
||||
"""
|
||||
Filters links.
|
||||
"""
|
||||
if self.check_link(raw_link):
|
||||
link = self.rewrite_link(raw_link)
|
||||
maybe_title = ' title="%s"' % escape_html(title) if title else ''
|
||||
link = escape_html(link)
|
||||
return ('<a href="%s"%s>' + content + '</a>') % (link, maybe_title)
|
||||
else:
|
||||
return escape_html("[%s](%s)" % (content, raw_link))
|
||||
|
||||
def check_link(self, link, is_image_src=False):
|
||||
"""
|
||||
This method is used to check a URL.
|
||||
|
||||
Returns :obj:`True` if the URL is "safe", :obj:`False` otherwise.
|
||||
|
||||
The default implementation only allows HTTP and HTTPS links. Using a
|
||||
blacklist approach is not recommended, see the
|
||||
`OWASP XSS Filter Evasion Cheat Sheet
|
||||
<https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet>`_ for
|
||||
an illustration of why.
|
||||
"""
|
||||
return bool(self._allowed_url_re.match(link))
|
||||
|
||||
def rewrite_link(self, link, is_image_src=False):
|
||||
"""
|
||||
This method is called to rewrite URLs.
|
||||
|
||||
The default implementation simply returns the given link.
|
||||
"""
|
||||
return link
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from chibitest import TestCase, ok
|
||||
from misaka import escape_html
|
||||
from misaka import escape_html, Markdown, SaferHtmlRenderer
|
||||
|
||||
|
||||
class EscapeHtmlTest(TestCase):
|
||||
|
@ -10,3 +10,91 @@ class EscapeHtmlTest(TestCase):
|
|||
|
||||
def test_escape_html_slash(self):
|
||||
ok(escape_html('a&<>"\'/', True)) == 'a&<>"'/'
|
||||
|
||||
|
||||
render = Markdown(SaferHtmlRenderer())
|
||||
render_escape = Markdown(SaferHtmlRenderer(sanitization_mode='escape'))
|
||||
|
||||
|
||||
class SaferHtmlRendererTest(TestCase):
|
||||
def test_html_skip(self):
|
||||
actual = render('Example <script>alert(1);</script>')
|
||||
expected = '<p>Example alert(1);</p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
||||
html = render('<sc<script>ript>xss</sc</script>ript>')
|
||||
ok(html).not_contains('<sc')
|
||||
ok(html).not_contains('ript>')
|
||||
|
||||
actual = render('<span><a href="javascript:xss">foo</a></span>')
|
||||
expected = '<p>foo</p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
||||
def test_html_escape(self):
|
||||
supplied = 'Example <script>alert(1);</script>'
|
||||
expected = '<p>%s</p>\n' % escape_html(supplied)
|
||||
ok(render_escape(supplied)).diff(expected)
|
||||
|
||||
html = render_escape('<sc<script>ript>xss</sc</script>ript>')
|
||||
ok(html).not_contains('<sc')
|
||||
ok(html).not_contains('ript>')
|
||||
|
||||
supplied = '<span><a href="javascript:xss">foo</a></span>'
|
||||
expected = '<p>%s</p>\n' % escape_html(supplied)
|
||||
ok(render_escape(supplied)).diff(expected)
|
||||
|
||||
def test_autolink_filtering_with_nice_data(self):
|
||||
for url in ('http://a', "https://b?x&y"):
|
||||
actual = render('<%s>' % url)
|
||||
expected = '<p><a href="{0}">{0}</a></p>\n'.format(escape_html(url))
|
||||
ok(actual).diff(expected)
|
||||
|
||||
supplied = "<alice@example.net>"
|
||||
expected = '<p>%s</p>\n' % escape_html(supplied)
|
||||
ok(render_escape(supplied)).diff(expected)
|
||||
|
||||
def test_autolink_filtering_with_naughty_data(self):
|
||||
actual = render('<javascript:foo>')
|
||||
expected = '<p><javascript:foo></p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
||||
link = 'javascript:0'
|
||||
encoded_link = ''.join('&x{0:x};'.format(ord(c)) for c in link)
|
||||
html = render('<%s>' % encoded_link)
|
||||
ok(html).not_contains(link)
|
||||
|
||||
def test_link_filtering_with_nice_data(self):
|
||||
for url in ('http://a', 'https://b'):
|
||||
actual = render("['foo](%s \"bar'\")" % url)
|
||||
expected = '<p><a href="{0}" title="bar'">'foo</a></p>\n'.format(url)
|
||||
ok(actual).diff(expected)
|
||||
|
||||
def test_link_filtering_with_naughty_data(self):
|
||||
supplied = '[foo](javascript:xss)'
|
||||
expected = '<p>%s</p>\n' % escape_html(supplied)
|
||||
ok(render(supplied)).diff(expected)
|
||||
|
||||
html = render('[foo](unknown:bar)')
|
||||
expected = '<p>%s</p>\n' % escape_html(supplied)
|
||||
ok(render(supplied)).diff(expected)
|
||||
|
||||
html = render('[" xss><xss>]("><xss>)')
|
||||
ok(html).not_contains('<xss>')
|
||||
ok(html).not_contains('" xss')
|
||||
html = render('[" xss><xss>](https:"><xss>)')
|
||||
ok(html).not_contains('<xss>')
|
||||
ok(html).not_contains('" xss')
|
||||
|
||||
def test_image_src_filtering_with_nice_data(self):
|
||||
actual = render('![](http:"foo")')
|
||||
expected = '<p><img src="http:"foo"" /></p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
||||
actual = render('!["bar"](https://example.org/ "\'title\'")')
|
||||
expected = '<p><img src="https://example.org/" alt=""bar"" title="'title'" /></p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
||||
def test_image_src_filtering_with_naughty_data(self):
|
||||
actual = render('![foo](javascript:foo)')
|
||||
expected = '<p>![foo](javascript:foo)</p>\n'
|
||||
ok(actual).diff(expected)
|
||||
|
|
Loading…
Reference in New Issue