Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8f7f9f0

Browse filesBrowse files
committed
Move the treewalker printer to the treewalker module
1 parent 9695fc8 commit 8f7f9f0
Copy full SHA for 8f7f9f0

File tree

2 files changed

+83
-79
lines changed
Filter options

2 files changed

+83
-79
lines changed

‎html5lib/tests/test_treewalkers.py

Copy file name to clipboardExpand all lines: html5lib/tests/test_treewalkers.py
+2-79Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -139,83 +139,6 @@ def GenshiAdapter(tree):
139139
"adapter": GenshiAdapter,
140140
"walker": treewalkers.getTreeWalker("genshi")}
141141

142-
143-
def concatenateCharacterTokens(tokens):
144-
charactersToken = None
145-
for token in tokens:
146-
type = token["type"]
147-
if type in ("Characters", "SpaceCharacters"):
148-
if charactersToken is None:
149-
charactersToken = {"type": "Characters", "data": token["data"]}
150-
else:
151-
charactersToken["data"] += token["data"]
152-
else:
153-
if charactersToken is not None:
154-
yield charactersToken
155-
charactersToken = None
156-
yield token
157-
if charactersToken is not None:
158-
yield charactersToken
159-
160-
161-
def convertTokens(tokens):
162-
output = []
163-
indent = 0
164-
for token in concatenateCharacterTokens(tokens):
165-
type = token["type"]
166-
if type in ("StartTag", "EmptyTag"):
167-
if (token["namespace"] and
168-
token["namespace"] != constants.namespaces["html"]):
169-
if token["namespace"] in constants.prefixes:
170-
name = constants.prefixes[token["namespace"]]
171-
else:
172-
name = token["namespace"]
173-
name += " " + token["name"]
174-
else:
175-
name = token["name"]
176-
output.append("%s<%s>" % (" " * indent, name))
177-
indent += 2
178-
attrs = token["data"]
179-
if attrs:
180-
# TODO: Remove this if statement, attrs should always exist
181-
for (namespace, name), value in sorted(attrs.items()):
182-
if namespace:
183-
if namespace in constants.prefixes:
184-
outputname = constants.prefixes[namespace]
185-
else:
186-
outputname = namespace
187-
outputname += " " + name
188-
else:
189-
outputname = name
190-
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
191-
if type == "EmptyTag":
192-
indent -= 2
193-
elif type == "EndTag":
194-
indent -= 2
195-
elif type == "Comment":
196-
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
197-
elif type == "Doctype":
198-
if token["name"]:
199-
if token["publicId"]:
200-
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
201-
(" " * indent, token["name"],
202-
token["publicId"],
203-
token["systemId"] and token["systemId"] or ""))
204-
elif token["systemId"]:
205-
output.append("""%s<!DOCTYPE %s "" "%s">""" %
206-
(" " * indent, token["name"],
207-
token["systemId"]))
208-
else:
209-
output.append("%s<!DOCTYPE %s>" % (" " * indent,
210-
token["name"]))
211-
else:
212-
output.append("%s<!DOCTYPE >" % (" " * indent,))
213-
elif type in ("Characters", "SpaceCharacters"):
214-
output.append("%s\"%s\"" % (" " * indent, token["data"]))
215-
else:
216-
pass # TODO: what to do with errors?
217-
return "\n".join(output)
218-
219142
import re
220143
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
221144

@@ -265,7 +188,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
265188

266189
document = treeClass.get("adapter", lambda x: x)(document)
267190
try:
268-
output = convertTokens(treeClass["walker"](document))
191+
output = treewalkers.pprint(treeClass["walker"](document))
269192
output = attrlist.sub(sortattrs, output)
270193
expected = attrlist.sub(sortattrs, convertExpected(expected))
271194
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
@@ -323,7 +246,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
323246
set_attribute_on_first_child(document, nom, val, treeName)
324247

325248
document = treeClass.get("adapter", lambda x: x)(document)
326-
output = convertTokens(treeClass["walker"](document))
249+
output = treewalkers.pprint(treeClass["walker"](document))
327250
output = attrlist.sub(sortattrs, output)
328251
if not output in expected:
329252
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))

‎html5lib/treewalkers/__init__.py

Copy file name to clipboardExpand all lines: html5lib/treewalkers/__init__.py
+81Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010

1111
from __future__ import absolute_import, division, unicode_literals
1212

13+
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
14+
"pulldom"]
15+
1316
import sys
1417

18+
from .. import constants
1519
from ..utils import default_etree
1620

1721
treeWalkerCache = {}
@@ -55,3 +59,80 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
5559
# XXX: NEVER cache here, caching is done in the etree submodule
5660
return etree.getETreeModule(implementation, **kwargs).TreeWalker
5761
return treeWalkerCache.get(treeType)
62+
63+
64+
def concatenateCharacterTokens(tokens):
65+
charactersToken = None
66+
for token in tokens:
67+
type = token["type"]
68+
if type in ("Characters", "SpaceCharacters"):
69+
if charactersToken is None:
70+
charactersToken = {"type": "Characters", "data": token["data"]}
71+
else:
72+
charactersToken["data"] += token["data"]
73+
else:
74+
if charactersToken is not None:
75+
yield charactersToken
76+
charactersToken = None
77+
yield token
78+
if charactersToken is not None:
79+
yield charactersToken
80+
81+
82+
def pprint(tokens):
83+
output = []
84+
indent = 0
85+
for token in concatenateCharacterTokens(tokens):
86+
type = token["type"]
87+
if type in ("StartTag", "EmptyTag"):
88+
if (token["namespace"] and
89+
token["namespace"] != constants.namespaces["html"]):
90+
if token["namespace"] in constants.prefixes:
91+
name = constants.prefixes[token["namespace"]]
92+
else:
93+
name = token["namespace"]
94+
name += " " + token["name"]
95+
else:
96+
name = token["name"]
97+
output.append("%s<%s>" % (" " * indent, name))
98+
indent += 2
99+
attrs = token["data"]
100+
if attrs:
101+
# TODO: Remove this if statement, attrs should always exist
102+
for (namespace, name), value in sorted(attrs.items()):
103+
if namespace:
104+
if namespace in constants.prefixes:
105+
outputname = constants.prefixes[namespace]
106+
else:
107+
outputname = namespace
108+
outputname += " " + name
109+
else:
110+
outputname = name
111+
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
112+
if type == "EmptyTag":
113+
indent -= 2
114+
elif type == "EndTag":
115+
indent -= 2
116+
elif type == "Comment":
117+
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
118+
elif type == "Doctype":
119+
if token["name"]:
120+
if token["publicId"]:
121+
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
122+
(" " * indent, token["name"],
123+
token["publicId"],
124+
token["systemId"] and token["systemId"] or ""))
125+
elif token["systemId"]:
126+
output.append("""%s<!DOCTYPE %s "" "%s">""" %
127+
(" " * indent, token["name"],
128+
token["systemId"]))
129+
else:
130+
output.append("%s<!DOCTYPE %s>" % (" " * indent,
131+
token["name"]))
132+
else:
133+
output.append("%s<!DOCTYPE >" % (" " * indent,))
134+
elif type in ("Characters", "SpaceCharacters"):
135+
output.append("%s\"%s\"" % (" " * indent, token["data"]))
136+
else:
137+
pass # TODO: what to do with errors?
138+
return "\n".join(output)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.