summaryrefslogtreecommitdiff
path: root/crocoite/test_html.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-06-15 13:51:41 +0200
committerLars-Dominik Braun <lars@6xq.net>2019-06-17 14:06:42 +0200
commit23b3fed7b44e4059901ea2d09c866d385fa05bfc (patch)
treedfabfc03ea3ac1376d8a5073f14f602973e30b29 /crocoite/test_html.py
parent158f55eb7fb24fa26727a008ad44964390171060 (diff)
downloadcrocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.tar.gz
crocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.tar.bz2
crocoite-23b3fed7b44e4059901ea2d09c866d385fa05bfc.zip
html: Fix CDATA walking
Missing “from” keyword, returned generator instead of dicts. Properly recreate CDATA elements now.
Diffstat (limited to 'crocoite/test_html.py')
-rw-r--r--crocoite/test_html.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/crocoite/test_html.py b/crocoite/test_html.py
index c71697a..c17903b 100644
--- a/crocoite/test_html.py
+++ b/crocoite/test_html.py
@@ -18,9 +18,11 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
+import asyncio
import pytest, html5lib
from html5lib.serializer import HTMLSerializer
from html5lib.treewalkers import getTreeWalker
+from aiohttp import web
from .html import StripTagFilter, StripAttributeFilter, ChromeTreeWalker
from .test_devtools import tab, browser
@@ -58,3 +60,37 @@ async def test_treewalker (tab):
elif i == 1:
assert result == framehtml
+cdataDoc = '<test><![CDATA[Hello world]]></test>'
+xmlHeader = '<?xml version="1.0" encoding="UTF-8"?>'
+async def hello(request):
+ return web.Response(text=xmlHeader + cdataDoc, content_type='text/xml')
+
+@pytest.fixture
+async def server ():
+ """ Simple HTTP server for testing notifications """
+ app = web.Application()
+ app.add_routes([web.get('/test.xml', hello)])
+ runner = web.AppRunner(app)
+ await runner.setup()
+ site = web.TCPSite(runner, 'localhost', 8080)
+ await site.start()
+ yield app
+ await runner.cleanup ()
+
+@pytest.mark.asyncio
+async def test_treewalker_cdata (tab, server):
+ ret = await tab.Page.navigate (url='http://localhost:8080/test.xml')
+ # wait until loaded XXX: replace with idle check
+ await asyncio.sleep (0.5)
+ dom = await tab.DOM.getDocument (depth=-1, pierce=True)
+ docs = list (ChromeTreeWalker (dom['root']).split ())
+ assert len(docs) == 1
+ for i, doc in enumerate (docs):
+ walker = ChromeTreeWalker (doc)
+ serializer = HTMLSerializer ()
+ result = serializer.render (iter(walker))
+ # chrome will display a pretty-printed viewer *plus* the original
+ # source (stripped of its xml header)
+ assert cdataDoc in result
+
+