summaryrefslogtreecommitdiff
path: root/crocoite/html.py
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite/html.py')
-rw-r--r--crocoite/html.py19
1 files changed, 11 insertions, 8 deletions
diff --git a/crocoite/html.py b/crocoite/html.py
index f891101..30f6ca5 100644
--- a/crocoite/html.py
+++ b/crocoite/html.py
@@ -22,6 +22,10 @@
HTML helper
"""
+from html5lib.treewalkers.base import TreeWalker
+from html5lib.filters.base import Filter
+from html5lib import constants
+
# HTML void tags, see https://html.spec.whatwg.org/multipage/syntax.html#void-elements
voidTags = {'area',
'base',
@@ -103,10 +107,7 @@ eventAttributes = {'onabort',
'onvolumechange',
'onwaiting'}
-from html5lib.treewalkers.base import TreeWalker
-from html5lib.filters.base import Filter
-from html5lib.serializer import HTMLSerializer
-from html5lib import constants
+default_namespace = constants.namespaces["html"]
class ChromeTreeWalker (TreeWalker):
"""
@@ -123,11 +124,14 @@ class ChromeTreeWalker (TreeWalker):
elif name == '#document':
for child in node.get ('children', []):
yield from self.recurse (child)
+ elif name == '#cdata-section':
+ # html5lib cannot generate cdata, so we’re faking it by using
+ # an empty tag
+ yield from self.emptyTag (default_namespace,
+ '![CDATA[' + node['nodeValue'] + ']]', {})
else:
- assert False, name
+ assert False, (name, node)
else:
- default_namespace = constants.namespaces["html"]
-
attributes = node.get ('attributes', [])
convertedAttr = {}
for i in range (0, len (attributes), 2):
@@ -195,7 +199,6 @@ class StripAttributeFilter (Filter):
self.attributes = set (map (str.lower, attributes))
def __iter__(self):
- default_namespace = constants.namespaces["html"]
for token in Filter.__iter__(self):
data = token.get ('data')
if data and token['type'] in {'StartTag', 'EmptyTag'}: