Skip to content

Commit ae51b0a

Browse files
author
James Graham
committed
Updates to match current spec including end tag handling for inForeign
1 parent 1221a95 commit ae51b0a

File tree

1 file changed

+70
-52
lines changed

1 file changed

+70
-52
lines changed

html5lib/html5parser.py

Lines changed: 70 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ def wrapped(self, *args, **kwargs):
461461
return function(self, *args, **kwargs)
462462
return wrapped
463463

464+
def getMetaclass(use_metaclass, metaclass_func):
465+
if use_metaclass:
466+
return method_decorator_metaclass(metaclass_func)
467+
else:
468+
return type
469+
464470
class Phase(object):
465471
"""Base class for helper object that implements each phase of processing
466472
"""
@@ -475,8 +481,7 @@ class Phase(object):
475481
# * EndTag
476482
# - endTag* methods
477483

478-
if debug_log:
479-
__metaclass__ = method_decorator_metaclass(log)
484+
__metaclass__ = getMetaclass(debug_log, log)
480485

481486
def __init__(self, parser, tree):
482487
self.parser = parser
@@ -851,6 +856,9 @@ def processCharacters(self, token):
851856
self.anythingElse()
852857
self.parser.phase.processCharacters(token)
853858

859+
def startTagHtml(self, token):
860+
self.parser.phases["inBody"].processStartTag(token)
861+
854862
def startTagBody(self, token):
855863
self.parser.framesetOK = False
856864
self.tree.insertElement(token)
@@ -956,7 +964,7 @@ def __init__(self, parser, tree):
956964
(headingElements, self.endTagHeading),
957965
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
958966
"strike", "strong", "tt", "u"), self.endTagFormatting),
959-
(("applet", "button", "marquee", "object"), self.endTagAppletButtonMarqueeObject),
967+
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
960968
("br", self.endTagBr),
961969
])
962970
self.endTagHandler.default = self.endTagOther
@@ -1120,7 +1128,6 @@ def startTagButton(self, token):
11201128
else:
11211129
self.tree.reconstructActiveFormattingElements()
11221130
self.tree.insertElement(token)
1123-
self.tree.activeFormattingElements.append(Marker)
11241131
self.parser.framesetOK = False
11251132

11261133
def startTagAppletMarqueeObject(self, token):
@@ -1395,10 +1402,9 @@ def endTagFormatting(self, token):
13951402
# Step 1 paragraph 1
13961403
formattingElement = self.tree.elementInActiveFormattingElements(
13971404
token["name"])
1398-
if not formattingElement or (formattingElement in
1399-
self.tree.openElements and
1400-
not self.tree.elementInScope(
1401-
formattingElement.name)):
1405+
if (not formattingElement or
1406+
(formattingElement in self.tree.openElements and
1407+
not self.tree.elementInScope(formattingElement.name))):
14021408
self.parser.parseError("adoption-agency-1.1", {"name": token["name"]})
14031409
return
14041410

@@ -1421,7 +1427,6 @@ def endTagFormatting(self, token):
14211427
specialElements | scopingElements):
14221428
furthestBlock = element
14231429
break
1424-
14251430
# Step 3
14261431
if furthestBlock is None:
14271432
element = self.tree.openElements.pop()
@@ -1487,7 +1492,12 @@ def endTagFormatting(self, token):
14871492
# lastNode
14881493
if lastNode.parent:
14891494
lastNode.parent.removeChild(lastNode)
1490-
commonAncestor.appendChild(lastNode)
1495+
1496+
if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
1497+
parent, insertBefore = self.tree.getTableMisnestedNodePosition()
1498+
parent.insertBefore(lastNode, insertBefore)
1499+
else:
1500+
commonAncestor.appendChild(lastNode)
14911501

14921502
# Step 8
14931503
clone = formattingElement.cloneNode()
@@ -1507,7 +1517,7 @@ def endTagFormatting(self, token):
15071517
self.tree.openElements.insert(
15081518
self.tree.openElements.index(furthestBlock) + 1, clone)
15091519

1510-
def endTagAppletButtonMarqueeObject(self, token):
1520+
def endTagAppletMarqueeObject(self, token):
15111521
if self.tree.elementInScope(token["name"]):
15121522
self.tree.generateImpliedEndTags()
15131523
if self.tree.openElements[-1].name != token["name"]:
@@ -1532,7 +1542,6 @@ def endTagOther(self, token):
15321542
self.tree.generateImpliedEndTags(exclude=token["name"])
15331543
if self.tree.openElements[-1].name != token["name"]:
15341544
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1535-
print self.tree.openElements
15361545
while self.tree.openElements.pop() != node:
15371546
pass
15381547
break
@@ -1957,6 +1966,7 @@ def startTagTableOther(self, token):
19571966
self.parser.phase.processStartTag(token)
19581967
else:
19591968
# innerHTML case
1969+
assert self.parser.innerHTML
19601970
self.parser.parseError()
19611971

19621972
def startTagOther(self, token):
@@ -1981,6 +1991,7 @@ def endTagTable(self, token):
19811991
self.parser.phase.processEndTag(token)
19821992
else:
19831993
# innerHTML case
1994+
assert self.parser.innerHTML
19841995
self.parser.parseError()
19851996

19861997
def endTagIgnore(self, token):
@@ -2072,6 +2083,7 @@ def endTagTableRowGroup(self, token):
20722083
self.parser.phase.processEndTag(token)
20732084
else:
20742085
# innerHTML case
2086+
assert self.parser.innerHTML
20752087
self.parser.parseError()
20762088

20772089
def endTagIgnore(self, token):
@@ -2120,6 +2132,7 @@ def startTagTableOther(self, token):
21202132
self.parser.phase.processStartTag(token)
21212133
else:
21222134
# innerHTML case
2135+
assert self.parser.innerHTML
21232136
self.parser.parseError()
21242137

21252138
def startTagOther(self, token):
@@ -2181,9 +2194,7 @@ def __init__(self, parser, tree):
21812194
self.endTagHandler = utils.MethodDispatcher([
21822195
("option", self.endTagOption),
21832196
("optgroup", self.endTagOptgroup),
2184-
("select", self.endTagSelect),
2185-
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td",
2186-
"th"), self.endTagTableElements)
2197+
("select", self.endTagSelect)
21872198
])
21882199
self.endTagHandler.default = self.endTagOther
21892200

@@ -2255,15 +2266,9 @@ def endTagSelect(self, token):
22552266
self.parser.resetInsertionMode()
22562267
else:
22572268
# innerHTML case
2269+
assert self.parser.innerHTML
22582270
self.parser.parseError()
22592271

2260-
def endTagTableElements(self, token):
2261-
self.parser.parseError("unexpected-end-tag-in-select",
2262-
{"name": token["name"]})
2263-
if self.tree.elementInScope(token["name"], variant="table"):
2264-
self.endTagSelect("select")
2265-
self.parser.phase.processEndTag(token)
2266-
22672272
def endTagOther(self, token):
22682273
self.parser.parseError("unexpected-end-tag-in-select",
22692274
{"name": token["name"]})
@@ -2322,16 +2327,13 @@ def __init__(self, parser, tree):
23222327
Phase.__init__(self, parser, tree)
23232328

23242329
def nonHTMLElementInScope(self):
2330+
rv = False
23252331
for element in self.tree.openElements[::-1]:
2326-
if element.namespace == self.tree.defaultNamespace:
2327-
return self.tree.elementInScope(element)
2328-
assert False
2329-
for item in self.tree.openElements[::-1]:
2330-
if item.namespace == self.tree.defaultNamespace:
2331-
return True
2332-
elif item.nameTuple in scopingElements:
2333-
return False
2334-
return False
2332+
if element.namespace != self.tree.defaultNamespace:
2333+
rv = self.tree.elementInScope(element.name)
2334+
if rv:
2335+
break
2336+
return rv
23352337

23362338
def adjustSVGTagNames(self, token):
23372339
replacements = {"altglyph":"altGlyph",
@@ -2383,21 +2385,23 @@ def processEOF(self):
23832385

23842386
def processStartTag(self, token):
23852387
currentNode = self.tree.openElements[-1]
2386-
if (currentNode.namespace == self.tree.defaultNamespace or
2387-
(currentNode.namespace == namespaces["mathml"] and
2388+
currentNodeNamespace = currentNode.namespace
2389+
currentNodeName = currentNode.name
2390+
if (currentNodeNamespace == self.tree.defaultNamespace or
2391+
(currentNodeNamespace == namespaces["mathml"] and
23882392
token["name"] not in frozenset(["mglyph", "malignmark"]) and
2389-
currentNode.name in frozenset(["mi", "mo", "mn",
2393+
currentNodeName in frozenset(["mi", "mo", "mn",
23902394
"ms", "mtext"])) or
2391-
(currentNode.namespace == namespaces["mathml"] and
2392-
currentNode.name == "annotation-xml" and
2395+
(currentNodeNamespace == namespaces["mathml"] and
2396+
currentNodeName == "annotation-xml" and
23932397
token["name"] == "svg") or
23942398
(currentNode.namespace == namespaces["svg"] and
23952399
currentNode.name in frozenset(["foreignObject",
23962400
"desc", "title"])
23972401
)):
23982402
assert self.parser.secondaryPhase != self
23992403
self.parser.secondaryPhase.processStartTag(token)
2400-
if self.parser.phase == self and self.nonHTMLElementInScope():
2404+
if self.parser.phase == self and not self.nonHTMLElementInScope():
24012405
self.parser.phase = self.parser.secondaryPhase
24022406
elif token["name"] in self.breakoutElements:
24032407
self.parser.parseError("unexpected-html-element-in-foreign-content",
@@ -2421,10 +2425,34 @@ def processStartTag(self, token):
24212425
token["selfClosingAcknowledged"] = True
24222426

24232427
def processEndTag(self, token):
2424-
self.adjustSVGTagNames(token)
2425-
self.parser.secondaryPhase.processEndTag(token)
2426-
if self.parser.phase == self and self.nonHTMLElementInScope():
2427-
self.parser.phase = self.parser.secondaryPhase
2428+
if self.tree.openElements[-1].namespace != self.tree.defaultNamespace:
2429+
nodeIndex = len(self.tree.openElements) - 1
2430+
node = self.tree.openElements[-1]
2431+
if node.name != token["name"]:
2432+
self.parser.parseError("unexpected-end-tag", token["name"])
2433+
2434+
while True:
2435+
if node.name == token["name"]:
2436+
popped = self.tree.openElements.pop()
2437+
while popped != node:
2438+
popped = self.tree.openElements.pop()
2439+
assert self.tree.openElements
2440+
break
2441+
nodeIndex -= 1
2442+
2443+
node = self.tree.openElements[nodeIndex]
2444+
if node.namespace == self.tree.defaultNamespace:
2445+
assert self.parser.secondaryPhase != self
2446+
self.parser.secondaryPhase.processEndTag(token)
2447+
if self.parser.phase == self and not self.nonHTMLElementInScope():
2448+
self.parser.phase = self.parser.secondaryPhase
2449+
break
2450+
2451+
else:
2452+
assert self.parser.secondaryPhase != self
2453+
self.parser.secondaryPhase.processEndTag(token)
2454+
if self.parser.phase == self and not self.nonHTMLElementInScope():
2455+
self.parser.phase = self.parser.secondaryPhase
24282456

24292457
class AfterBodyPhase(Phase):
24302458
def __init__(self, parser, tree):
@@ -2487,8 +2515,7 @@ def __init__(self, parser, tree):
24872515
self.startTagHandler.default = self.startTagOther
24882516

24892517
self.endTagHandler = utils.MethodDispatcher([
2490-
("frameset", self.endTagFrameset),
2491-
("noframes", self.endTagNoframes)
2518+
("frameset", self.endTagFrameset)
24922519
])
24932520
self.endTagHandler.default = self.endTagOther
24942521

@@ -2527,9 +2554,6 @@ def endTagFrameset(self, token):
25272554
# "frameset" element (anymore) then switch.
25282555
self.parser.phase = self.parser.phases["afterFrameset"]
25292556

2530-
def endTagNoframes(self, token):
2531-
self.parser.phases["inBody"].processEndTag(token)
2532-
25332557
def endTagOther(self, token):
25342558
self.parser.parseError("unexpected-end-tag-in-frameset",
25352559
{"name": token["name"]})
@@ -2632,8 +2656,6 @@ def processSpaceCharacters(self, token):
26322656

26332657
def processCharacters(self, token):
26342658
self.parser.parseError("expected-eof-but-got-char")
2635-
self.parser.phase = self.parser.phases["inBody"]
2636-
self.parser.phase.processCharacters(token)
26372659

26382660
def startTagHtml(self, token):
26392661
self.parser.phases["inBody"].processStartTag(token)
@@ -2644,14 +2666,10 @@ def startTagNoFrames(self, token):
26442666
def startTagOther(self, token):
26452667
self.parser.parseError("expected-eof-but-got-start-tag",
26462668
{"name": token["name"]})
2647-
self.parser.phase = self.parser.phases["inBody"]
2648-
self.parser.phase.processStartTag(token)
26492669

26502670
def processEndTag(self, token):
26512671
self.parser.parseError("expected-eof-but-got-end-tag",
26522672
{"name": token["name"]})
2653-
self.parser.phase = self.parser.phases["inBody"]
2654-
self.parser.phase.processEndTag(token)
26552673

26562674
def impliedTagToken(name, type="EndTag", attributes = None,
26572675
selfClosing = False):

0 commit comments

Comments
 (0)