From 23b3fed7b44e4059901ea2d09c866d385fa05bfc Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun <lars@6xq.net>
Date: Sat, 15 Jun 2019 13:51:41 +0200
Subject: html: Fix CDATA walking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Missing “from” keyword, returned generator instead of dicts. Properly
recreate CDATA elements now.
---
 crocoite/html.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'crocoite/html.py')

diff --git a/crocoite/html.py b/crocoite/html.py
index ae5b03c..30f6ca5 100644
--- a/crocoite/html.py
+++ b/crocoite/html.py
@@ -107,6 +107,8 @@ eventAttributes = {'onabort',
         'onvolumechange',
         'onwaiting'}
 
+default_namespace = constants.namespaces["html"]
+
 class ChromeTreeWalker (TreeWalker):
     """
     Recursive html5lib TreeWalker for Google Chrome method DOM.getDocument
@@ -123,14 +125,13 @@ class ChromeTreeWalker (TreeWalker):
                 for child in node.get ('children', []):
                     yield from self.recurse (child)
             elif name == '#cdata-section':
-                # html5lib cannot generate cdata. text should be fine. This
-                # only happens when using Chrome’s inline XML display.
-                yield self.text (node['nodeValue'])
+                # html5lib cannot generate cdata, so we’re faking it by using
+                # an empty tag
+                yield from self.emptyTag (default_namespace,
+                        '![CDATA[' + node['nodeValue'] + ']]', {})
             else:
                 assert False, (name, node)
         else:
-            default_namespace = constants.namespaces["html"]
-
             attributes = node.get ('attributes', [])
             convertedAttr = {}
             for i in range (0, len (attributes), 2):
-- 
cgit v1.2.3