summaryrefslogtreecommitdiff
path: root/crocoite/cli.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2017-11-26 10:23:34 +0100
committerLars-Dominik Braun <lars@6xq.net>2017-11-26 10:25:13 +0100
commit6b1d593ec841ebe18dcbdd18902f7faad0868fd4 (patch)
treef437aa9275f93492d5a860fb1790cb59d2d2e5a5 /crocoite/cli.py
parentee736e28ed4c5cdae395f5851eb2bad96d8078ef (diff)
downloadcrocoite-6b1d593ec841ebe18dcbdd18902f7faad0868fd4.tar.gz
crocoite-6b1d593ec841ebe18dcbdd18902f7faad0868fd4.tar.bz2
crocoite-6b1d593ec841ebe18dcbdd18902f7faad0868fd4.zip
DOM snapshot: Generate valid HTML5
Some tags are “void”, i.e. cannot contain contents and don’t have a closing tag.
Diffstat (limited to 'crocoite/cli.py')
-rw-r--r--crocoite/cli.py19
1 files changed, 12 insertions, 7 deletions
diff --git a/crocoite/cli.py b/crocoite/cli.py
index a2ac958..640d207 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -107,14 +107,20 @@ class ChromeTreeWalker (TreeWalker):
assert False, name
else:
default_namespace = constants.namespaces["html"]
+
attributes = node.get ('attributes', [])
convertedAttr = {}
for i in range (0, len (attributes), 2):
convertedAttr[(default_namespace, attributes[i])] = attributes[i+1]
- yield self.startTag (default_namespace, name, convertedAttr)
- for child in node.get ('children', []):
- yield from self.recurse (child)
- yield self.endTag ('', name)
+
+ children = node.get ('children', [])
+ if name.lower() in html.voidTags and not children:
+ yield from self.emptyTag (default_namespace, name, convertedAttr)
+ else:
+ yield self.startTag (default_namespace, name, convertedAttr)
+ for child in node.get ('children', []):
+ yield from self.recurse (child)
+ yield self.endTag ('', name)
def __iter__ (self):
assert self.tree['nodeName'] == '#document'
@@ -151,7 +157,7 @@ class StripTagFilter (Filter):
delete = 0
for token in Filter.__iter__(self):
tokenType = token['type']
- if tokenType == 'StartTag':
+ if tokenType in {'StartTag', 'EmptyTag'}:
if delete > 0 or token['name'].lower () in self.tags:
delete += 1
if delete == 0:
@@ -172,8 +178,7 @@ class StripAttributeFilter (Filter):
default_namespace = constants.namespaces["html"]
for token in Filter.__iter__(self):
data = token.get ('data')
- # XXX: Handle EmptyTag
- if data and token['type'] == 'StartTag':
+ if data and token['type'] in {'StartTag', 'EmptyTag'}:
newdata = {}
for (namespace, k), v in data.items ():
if k.lower () not in self.attributes: