Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d8d5bb6

Browse filesBrowse files
committed
Remove ability to use a custom tokenizer
This should be unneeded since the sanitizer changes (#110)
1 parent e65bee9 commit d8d5bb6
Copy full SHA for d8d5bb6

File tree

1 file changed

+4
-10
lines changed
Filter options

1 file changed

+4
-10
lines changed

‎html5lib/html5parser.py

Copy file name to clipboardExpand all lines: html5lib/html5parser.py
+4-10Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,13 @@ class HTMLParser(object):
5959
"""HTML parser. Generates a tree structure from a stream of (possibly
6060
malformed) HTML"""
6161

62-
def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
63-
strict=False, namespaceHTMLElements=True, debug=False):
62+
def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
6463
"""
6564
strict - raise an exception when a parse error is encountered
6665
6766
tree - a treebuilder class controlling the type of tree that will be
6867
returned. Built in treebuilders can be accessed through
6968
html5lib.treebuilders.getTreeBuilder(treeType)
70-
71-
tokenizer - a class that provides a stream of tokens to the treebuilder.
72-
This may be replaced for e.g. a sanitizer which converts some tags to
73-
text
7469
"""
7570

7671
# Raise an exception on the first error encountered
@@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
7974
if tree is None:
8075
tree = treebuilders.getTreeBuilder("etree")
8176
self.tree = tree(namespaceHTMLElements)
82-
self.tokenizer_class = tokenizer
8377
self.errors = []
8478

8579
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
@@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
9185
self.innerHTMLMode = innerHTML
9286
self.container = container
9387
self.scripting = scripting
94-
self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
95-
useChardet=useChardet,
96-
parser=self, **kwargs)
88+
self.tokenizer = tokenizer.HTMLTokenizer(stream, encoding=encoding,
89+
useChardet=useChardet,
90+
parser=self, **kwargs)
9791
self.reset()
9892

9993
try:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.