From be5f9525649ac39fd9b72a0ad1e6442c72034834 Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun <lars@6xq.net>
Date: Sat, 5 Jan 2019 16:29:08 +0100
Subject: html: Handle CDATA

When loading XML documents Chrome presents a pretty-printed version to
the user, which still contains the original XML when exporting via
DOM.getDocument.

Not sure how to test this.
---
 crocoite/html.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'crocoite')

diff --git a/crocoite/html.py b/crocoite/html.py
index fec9760..ae5b03c 100644
--- a/crocoite/html.py
+++ b/crocoite/html.py
@@ -122,8 +122,12 @@ class ChromeTreeWalker (TreeWalker):
             elif name == '#document':
                 for child in node.get ('children', []):
                     yield from self.recurse (child)
+            elif name == '#cdata-section':
+                # html5lib cannot generate cdata. text should be fine. This
+                # only happens when using Chrome’s inline XML display.
+                yield self.text (node['nodeValue'])
             else:
-                assert False, name
+                assert False, (name, node)
         else:
             default_namespace = constants.namespaces["html"]
 
-- 
cgit v1.2.3