Skip to content

Commit e5cea2d

Browse files
author
Mark Pilgrim
committed
added support for validating class attribute values (space-separated token lists)
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%40973
1 parent 0cdaa3b commit e5cea2d

File tree

1 file changed

+55
-7
lines changed

1 file changed

+55
-7
lines changed

src/html5lib/filters/validator.py

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from sets import Set as set
2020
from sets import ImmutableSet as frozenset
2121
import _base
22-
from html5lib.constants import E
22+
from html5lib.constants import E, spaceCharacters
2323
from html5lib import tokenizer
2424
import gettext
2525
_ = gettext.gettext
@@ -37,6 +37,10 @@
3737
_(u"'%(attributeName)s' attribute is not allowed on <input type=%(inputType)s>."),
3838
"deprecated-attribute":
3939
_(u"'%(attributeName)s' attribute is deprecated on <%(tagName)s>."),
40+
"invalid-class-attribute":
41+
_(u"Invalid class attribute value on <%(tagName)s>."),
42+
"duplicate-value-in-token-list":
43+
_(u"Duplicate value '%(attributeValue)s' in token list in '%(attributeName)s' attribute on <%(tagName)s>."),
4044
})
4145

4246
globalAttributes = frozenset(('class', 'contenteditable', 'contextmenu', 'dir',
@@ -225,18 +229,35 @@ def __iter__(self):
225229
for t in method(token) or []: yield t
226230
yield token
227231

232+
def checkAttributeValues(self, token):
233+
tagName = token.get("name", "")
234+
fakeToken = {"tagName": tagName.capitalize()}
235+
for attrName, attrValue in token.get("data", []):
236+
attrName = attrName.lower()
237+
fakeToken["attributeName"] = attrName.capitalize()
238+
method = getattr(self, "validateAttributeValue%(tagName)s%(attributeName)s" % fakeToken, None)
239+
if method:
240+
for t in method(token, tagName, attrName, attrValue) or []: yield t
241+
else:
242+
method = getattr(self, "validateAttributeValue%(attributeName)s" % fakeToken, None)
243+
if method:
244+
for t in method(token, tagName, attrName, attrValue) or []: yield t
245+
228246
def validateStartTag(self, token):
229247
for t in self.checkUnknownStartTag(token) or []: yield t
230248
for t in self.checkStartTagRequiredAttributes(token) or []: yield t
231249
for t in self.checkStartTagUnknownAttributes(token) or []: yield t
250+
for t in self.checkAttributeValues(token) or []: yield t
232251

233252
def validateStartTagEmbed(self, token):
234253
for t in self.checkStartTagRequiredAttributes(token) or []: yield t
254+
for t in self.checkAttributeValues(token) or []: yield t
235255
# spec says "any attributes w/o namespace"
236256
# so don't call checkStartTagUnknownAttributes
237257

238258
def validateStartTagInput(self, token):
239-
attrDict = dict([(name.lower(), value) for name, value in token["data"]])
259+
for t in self.checkAttributeValues(token) or []: yield t
260+
attrDict = dict([(name.lower(), value) for name, value in token.get("data", [])])
240261
inputType = attrDict.get("type", "text")
241262
if inputType not in inputTypeAllowedAttributeMap.keys():
242263
yield {"type": "ParseError",
@@ -262,18 +283,18 @@ def validateStartTagInput(self, token):
262283

263284
def checkUnknownStartTag(self, token):
264285
# check for recognized tag name
265-
name = token["name"].lower()
286+
name = token.get("name", "").lower()
266287
if name not in allowedAttributeMap.keys():
267288
yield {"type": "ParseError",
268289
"data": "unknown-start-tag",
269290
"datavars": {"tagName": name}}
270291

271292
def checkStartTagRequiredAttributes(self, token):
272293
# check for presence of required attributes
273-
name = token["name"].lower()
294+
name = token.get("name", "").lower()
274295
if name in requiredAttributeMap.keys():
275296
attrsPresent = [attrName for attrName, attrValue
276-
in token["data"]]
297+
in token.get("data", [])]
277298
for attrName in requiredAttributeMap[name]:
278299
if attrName not in attrsPresent:
279300
yield {"type": "ParseError",
@@ -283,12 +304,39 @@ def checkStartTagRequiredAttributes(self, token):
283304

284305
def checkStartTagUnknownAttributes(self, token):
285306
# check for recognized attribute names
286-
name = token["name"].lower()
307+
name = token.get("name").lower()
287308
allowedAttributes = globalAttributes | allowedAttributeMap.get(name, frozenset(()))
288-
for attrName, attrValue in token["data"]:
309+
for attrName, attrValue in token.get("data", []):
289310
if attrName.lower() not in allowedAttributes:
290311
yield {"type": "ParseError",
291312
"data": "unknown-attribute",
292313
"datavars": {"tagName": name,
293314
"attributeName": attrName}}
294315

316+
def validateAttributeValueClass(self, token, tagName, attrName, attrValue):
317+
for t in self.checkTokenList(tagName, attrName, attrValue) or []:
318+
yield t
319+
yield {"type": "ParseError",
320+
"data": "invalid-class-attribute",
321+
"datavars": {"tagName": tagName}}
322+
323+
def checkTokenList(self, tagName, attrName, attrValue):
324+
# The "token" in the method name refers to tokens in an attribute value
325+
# i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
326+
# but the "token" parameter refers to the token generated from
327+
# HTMLTokenizer. Sorry for the confusion.
328+
valueList = []
329+
currentValue = ''
330+
for c in attrValue + ' ':
331+
if c in spaceCharacters:
332+
if currentValue:
333+
if currentValue in valueList:
334+
yield {"type": "ParseError",
335+
"data": "duplicate-value-in-token-list",
336+
"datavars": {"tagName": tagName,
337+
"attributeName": attrName,
338+
"attributeValue": currentValue}}
339+
valueList.append(currentValue)
340+
currentValue = ''
341+
else:
342+
currentValue += c

0 commit comments

Comments
 (0)