From 23b3fed7b44e4059901ea2d09c866d385fa05bfc Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 15 Jun 2019 13:51:41 +0200 Subject: html: Fix CDATA walking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing “from” keyword, returned generator instead of dicts. Properly recreate CDATA elements now. --- crocoite/test_html.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'crocoite/test_html.py') diff --git a/crocoite/test_html.py b/crocoite/test_html.py index c71697a..c17903b 100644 --- a/crocoite/test_html.py +++ b/crocoite/test_html.py @@ -18,9 +18,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +import asyncio import pytest, html5lib from html5lib.serializer import HTMLSerializer from html5lib.treewalkers import getTreeWalker +from aiohttp import web from .html import StripTagFilter, StripAttributeFilter, ChromeTreeWalker from .test_devtools import tab, browser @@ -58,3 +60,37 @@ async def test_treewalker (tab): elif i == 1: assert result == framehtml +cdataDoc = '' +xmlHeader = '' +async def hello(request): + return web.Response(text=xmlHeader + cdataDoc, content_type='text/xml') + +@pytest.fixture +async def server (): + """ Simple HTTP server for testing notifications """ + app = web.Application() + app.add_routes([web.get('/test.xml', hello)]) + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, 'localhost', 8080) + await site.start() + yield app + await runner.cleanup () + +@pytest.mark.asyncio +async def test_treewalker_cdata (tab, server): + ret = await tab.Page.navigate (url='http://localhost:8080/test.xml') + # wait until loaded XXX: replace with idle check + await asyncio.sleep (0.5) + dom = await tab.DOM.getDocument (depth=-1, pierce=True) + docs = list (ChromeTreeWalker (dom['root']).split ()) + assert len(docs) == 1 + for i, doc in enumerate (docs): + walker = ChromeTreeWalker (doc) + serializer = HTMLSerializer () + result = serializer.render (iter(walker)) + # chrome will display a pretty-printed viewer *plus* the original + # source (stripped of its xml header) + assert cdataDoc in result + + -- cgit v1.2.3