diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2019-06-15 13:51:41 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2019-06-17 14:06:42 +0200 |
commit | 23b3fed7b44e4059901ea2d09c866d385fa05bfc (patch) | |
tree | dfabfc03ea3ac1376d8a5073f14f602973e30b29 /crocoite/html.py | |
parent | 158f55eb7fb24fa26727a008ad44964390171060 (diff) | |
download | crocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.tar.gz crocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.tar.bz2 crocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.zip |
html: Fix CDATA walking
Missing “from” keyword, returned generator instead of dicts. Properly
recreate CDATA elements now.
Diffstat (limited to 'crocoite/html.py')
-rw-r--r-- | crocoite/html.py | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/crocoite/html.py b/crocoite/html.py index ae5b03c..30f6ca5 100644 --- a/crocoite/html.py +++ b/crocoite/html.py @@ -107,6 +107,8 @@ eventAttributes = {'onabort', 'onvolumechange', 'onwaiting'} +default_namespace = constants.namespaces["html"] + class ChromeTreeWalker (TreeWalker): """ Recursive html5lib TreeWalker for Google Chrome method DOM.getDocument @@ -123,14 +125,13 @@ class ChromeTreeWalker (TreeWalker): for child in node.get ('children', []): yield from self.recurse (child) elif name == '#cdata-section': - # html5lib cannot generate cdata. text should be fine. This - # only happens when using Chrome’s inline XML display. - yield self.text (node['nodeValue']) + # html5lib cannot generate cdata, so we’re faking it by using + # an empty tag + yield from self.emptyTag (default_namespace, + '![CDATA[' + node['nodeValue'] + ']]', {}) else: assert False, (name, node) else: - default_namespace = constants.namespaces["html"] - attributes = node.get ('attributes', []) convertedAttr = {} for i in range (0, len (attributes), 2): |