diff options
Diffstat (limited to 'crocoite/html.py')
-rw-r--r-- | crocoite/html.py | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/crocoite/html.py b/crocoite/html.py index f891101..30f6ca5 100644 --- a/crocoite/html.py +++ b/crocoite/html.py @@ -22,6 +22,10 @@ HTML helper """ +from html5lib.treewalkers.base import TreeWalker +from html5lib.filters.base import Filter +from html5lib import constants + # HTML void tags, see https://html.spec.whatwg.org/multipage/syntax.html#void-elements voidTags = {'area', 'base', @@ -103,10 +107,7 @@ eventAttributes = {'onabort', 'onvolumechange', 'onwaiting'} -from html5lib.treewalkers.base import TreeWalker -from html5lib.filters.base import Filter -from html5lib.serializer import HTMLSerializer -from html5lib import constants +default_namespace = constants.namespaces["html"] class ChromeTreeWalker (TreeWalker): """ @@ -123,11 +124,14 @@ class ChromeTreeWalker (TreeWalker): elif name == '#document': for child in node.get ('children', []): yield from self.recurse (child) + elif name == '#cdata-section': + # html5lib cannot generate cdata, so we’re faking it by using + # an empty tag + yield from self.emptyTag (default_namespace, + '![CDATA[' + node['nodeValue'] + ']]', {}) else: - assert False, name + assert False, (name, node) else: - default_namespace = constants.namespaces["html"] - attributes = node.get ('attributes', []) convertedAttr = {} for i in range (0, len (attributes), 2): @@ -195,7 +199,6 @@ class StripAttributeFilter (Filter): self.attributes = set (map (str.lower, attributes)) def __iter__(self): - default_namespace = constants.namespaces["html"] for token in Filter.__iter__(self): data = token.get ('data') if data and token['type'] in {'StartTag', 'EmptyTag'}: |