Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f6741ea

Browse filesBrowse files
committed
Merge pull request #95 from gsnedders/escape-characters-serializer
Fix #11 by escaping enough to be safe in legacy browsers; r=nobody!
2 parents b48d0c1 + 9b8d8eb commit f6741ea
Copy full SHA for f6741ea

File tree

Expand file treeCollapse file tree

10 files changed

+4597
-74
lines changed
Filter options
Expand file treeCollapse file tree

10 files changed

+4597
-74
lines changed

‎CHANGES.rst

Copy file name to clipboardExpand all lines: CHANGES.rst
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Released on XXX
3333
* **Use scripting disabled by default (as we don't implement
3434
scripting).**
3535

36+
* **Fix #11, avoiding the XSS bug potentially caused by serializer
37+
allowing attribute values to be escaped out of in old browser versions,
38+
changing the quote_attr_values option on serializer to take one of
39+
three values, "always" (the old True value), "legacy" (the new option,
40+
and the new default), and "spec" (the old False value, and the old
41+
default).**
42+
3643

3744
0.9999999/1.0b8
3845
~~~~~~~~~~~~~~~

‎html5lib/filters/lint.py

Copy file name to clipboardExpand all lines: html5lib/filters/lint.py
+6-2Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010

1111

1212
class Filter(_base.Filter):
13+
def __init__(self, source, require_matching_tags=True):
14+
super(Filter, self).__init__(source)
15+
self.require_matching_tags = require_matching_tags
16+
1317
def __iter__(self):
1418
open_elements = []
1519
for token in _base.Filter.__iter__(self):
@@ -26,7 +30,7 @@ def __iter__(self):
2630
assert type == "EmptyTag"
2731
else:
2832
assert type == "StartTag"
29-
if type == "StartTag":
33+
if type == "StartTag" and self.require_matching_tags:
3034
open_elements.append((namespace, name))
3135
for (namespace, name), value in token["data"].items():
3236
assert namespace is None or isinstance(namespace, text_type)
@@ -44,7 +48,7 @@ def __iter__(self):
4448
assert name != ""
4549
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
4650
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
47-
else:
51+
elif self.require_matching_tags:
4852
start = open_elements.pop()
4953
assert start == (namespace, name)
5054

‎html5lib/serializer/htmlserializer.py

Copy file name to clipboardExpand all lines: html5lib/serializer/htmlserializer.py
+22-10Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from __future__ import absolute_import, division, unicode_literals
22
from six import text_type
33

4-
try:
5-
from functools import reduce
6-
except ImportError:
7-
pass
4+
import re
85

96
from ..constants import voidElements, booleanAttributes, spaceCharacters
107
from ..constants import rcdataElements, entities, xmlEntities
@@ -13,6 +10,17 @@
1310

1411
spaceCharacters = "".join(spaceCharacters)
1512

13+
quoteAttributeSpecChars = spaceCharacters + "\"'=<>`"
14+
quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]")
15+
quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars +
16+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
17+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
18+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
19+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
20+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
21+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
22+
"\u3000]")
23+
1624
try:
1725
from codecs import register_error, xmlcharrefreplace_errors
1826
except ImportError:
@@ -73,7 +81,7 @@ def htmlentityreplace_errors(exc):
7381
class HTMLSerializer(object):
7482

7583
# attribute quoting options
76-
quote_attr_values = False
84+
quote_attr_values = "legacy" # be secure by default
7785
quote_char = '"'
7886
use_best_quote_char = True
7987

@@ -109,9 +117,9 @@ def __init__(self, **kwargs):
109117
inject_meta_charset=True|False
110118
Whether it insert a meta element to define the character set of the
111119
document.
112-
quote_attr_values=True|False
120+
quote_attr_values="legacy"|"spec"|"always"
113121
Whether to quote attribute values that don't require quoting
114-
per HTML5 parsing rules.
122+
per legacy browser behaviour, when required by the standard, or always.
115123
quote_char=u'"'|u"'"
116124
Use given quote character for attribute quoting. Default is to
117125
use double quote unless attribute value contains a double quote,
@@ -240,11 +248,15 @@ def serialize(self, treewalker, encoding=None):
240248
(k not in booleanAttributes.get(name, tuple()) and
241249
k not in booleanAttributes.get("", tuple())):
242250
yield self.encodeStrict("=")
243-
if self.quote_attr_values or not v:
251+
if self.quote_attr_values == "always" or len(v) == 0:
244252
quote_attr = True
253+
elif self.quote_attr_values == "spec":
254+
quote_attr = quoteAttributeSpec.search(v) is not None
255+
elif self.quote_attr_values == "legacy":
256+
quote_attr = quoteAttributeLegacy.search(v) is not None
245257
else:
246-
quote_attr = reduce(lambda x, y: x or (y in v),
247-
spaceCharacters + ">\"'=", False)
258+
raise ValueError("quote_attr_values must be one of: "
259+
"'always', 'spec', or 'legacy'")
248260
v = v.replace("&", "&amp;")
249261
if self.escape_lt_in_attrs:
250262
v = v.replace("<", "&lt;")

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.