summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-01-05 16:29:08 +0100
committerLars-Dominik Braun <lars@6xq.net>2019-01-05 16:29:08 +0100
commitbe5f9525649ac39fd9b72a0ad1e6442c72034834 (patch)
treebb36b2fc7e0c5c5bb2c9961f4cad2b20f85ca421
parent8c91e3df875319768b6699ccf7654ccb5a357c6d (diff)
downloadcrocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.tar.gz
crocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.tar.bz2
crocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.zip
html: Handle CDATA
When loading XML documents Chrome presents a pretty-printed version to the user, which still contains the original XML when exporting via DOM.getDocument. Not sure how to test this.
-rw-r--r--crocoite/html.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/crocoite/html.py b/crocoite/html.py
index fec9760..ae5b03c 100644
--- a/crocoite/html.py
+++ b/crocoite/html.py
@@ -122,8 +122,12 @@ class ChromeTreeWalker (TreeWalker):
elif name == '#document':
for child in node.get ('children', []):
yield from self.recurse (child)
+ elif name == '#cdata-section':
+ # html5lib cannot generate cdata. text should be fine. This
+ # only happens when using Chrome’s inline XML display.
+ yield self.text (node['nodeValue'])
else:
- assert False, name
+ assert False, (name, node)
else:
default_namespace = constants.namespaces["html"]