diff options
| author | Lars-Dominik Braun <lars@6xq.net> | 2019-01-05 16:29:08 +0100 | 
|---|---|---|
| committer | Lars-Dominik Braun <lars@6xq.net> | 2019-01-05 16:29:08 +0100 | 
| commit | be5f9525649ac39fd9b72a0ad1e6442c72034834 (patch) | |
| tree | bb36b2fc7e0c5c5bb2c9961f4cad2b20f85ca421 /crocoite | |
| parent | 8c91e3df875319768b6699ccf7654ccb5a357c6d (diff) | |
| download | crocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.tar.gz crocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.tar.bz2 crocoite-be5f9525649ac39fd9b72a0ad1e6442c72034834.zip | |
html: Handle CDATA
When loading XML documents Chrome presents a pretty-printed version to
the user, which still contains the original XML when exporting via
DOM.getDocument.
Not sure how to test this.
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/html.py | 6 | 
1 files changed, 5 insertions, 1 deletions
| diff --git a/crocoite/html.py b/crocoite/html.py index fec9760..ae5b03c 100644 --- a/crocoite/html.py +++ b/crocoite/html.py @@ -122,8 +122,12 @@ class ChromeTreeWalker (TreeWalker):              elif name == '#document':                  for child in node.get ('children', []):                      yield from self.recurse (child) +            elif name == '#cdata-section': +                # html5lib cannot generate cdata. text should be fine. This +                # only happens when using Chrome’s inline XML display. +                yield self.text (node['nodeValue'])              else: -                assert False, name +                assert False, (name, node)          else:              default_namespace = constants.namespaces["html"] | 
