1919 from sets import Set as set
2020 from sets import ImmutableSet as frozenset
2121import _base
22- from html5lib .constants import E
22+ from html5lib .constants import E , spaceCharacters
2323from html5lib import tokenizer
2424import gettext
2525_ = gettext .gettext
3737 _ (u"'%(attributeName)s' attribute is not allowed on <input type=%(inputType)s>." ),
3838 "deprecated-attribute" :
3939 _ (u"'%(attributeName)s' attribute is deprecated on <%(tagName)s>." ),
40+ "invalid-class-attribute" :
41+ _ (u"Invalid class attribute value on <%(tagName)s>." ),
42+ "duplicate-value-in-token-list" :
43+ _ (u"Duplicate value '%(attributeValue)s' in token list in '%(attributeName)s' attribute on <%(tagName)s>." ),
4044})
4145
4246globalAttributes = frozenset (('class' , 'contenteditable' , 'contextmenu' , 'dir' ,
@@ -225,18 +229,35 @@ def __iter__(self):
225229 for t in method (token ) or []: yield t
226230 yield token
227231
232+ def checkAttributeValues (self , token ):
233+ tagName = token .get ("name" , "" )
234+ fakeToken = {"tagName" : tagName .capitalize ()}
235+ for attrName , attrValue in token .get ("data" , []):
236+ attrName = attrName .lower ()
237+ fakeToken ["attributeName" ] = attrName .capitalize ()
238+ method = getattr (self , "validateAttributeValue%(tagName)s%(attributeName)s" % fakeToken , None )
239+ if method :
240+ for t in method (token , tagName , attrName , attrValue ) or []: yield t
241+ else :
242+ method = getattr (self , "validateAttributeValue%(attributeName)s" % fakeToken , None )
243+ if method :
244+ for t in method (token , tagName , attrName , attrValue ) or []: yield t
245+
228246 def validateStartTag (self , token ):
229247 for t in self .checkUnknownStartTag (token ) or []: yield t
230248 for t in self .checkStartTagRequiredAttributes (token ) or []: yield t
231249 for t in self .checkStartTagUnknownAttributes (token ) or []: yield t
250+ for t in self .checkAttributeValues (token ) or []: yield t
232251
233252 def validateStartTagEmbed (self , token ):
234253 for t in self .checkStartTagRequiredAttributes (token ) or []: yield t
254+ for t in self .checkAttributeValues (token ) or []: yield t
235255 # spec says "any attributes w/o namespace"
236256 # so don't call checkStartTagUnknownAttributes
237257
238258 def validateStartTagInput (self , token ):
239- attrDict = dict ([(name .lower (), value ) for name , value in token ["data" ]])
259+ for t in self .checkAttributeValues (token ) or []: yield t
260+ attrDict = dict ([(name .lower (), value ) for name , value in token .get ("data" , [])])
240261 inputType = attrDict .get ("type" , "text" )
241262 if inputType not in inputTypeAllowedAttributeMap .keys ():
242263 yield {"type" : "ParseError" ,
@@ -262,18 +283,18 @@ def validateStartTagInput(self, token):
262283
263284 def checkUnknownStartTag (self , token ):
264285 # check for recognized tag name
265- name = token [ "name" ] .lower ()
286+ name = token . get ( "name" , "" ) .lower ()
266287 if name not in allowedAttributeMap .keys ():
267288 yield {"type" : "ParseError" ,
268289 "data" : "unknown-start-tag" ,
269290 "datavars" : {"tagName" : name }}
270291
271292 def checkStartTagRequiredAttributes (self , token ):
272293 # check for presence of required attributes
273- name = token [ "name" ] .lower ()
294+ name = token . get ( "name" , "" ) .lower ()
274295 if name in requiredAttributeMap .keys ():
275296 attrsPresent = [attrName for attrName , attrValue
276- in token [ "data" ] ]
297+ in token . get ( "data" , []) ]
277298 for attrName in requiredAttributeMap [name ]:
278299 if attrName not in attrsPresent :
279300 yield {"type" : "ParseError" ,
@@ -283,12 +304,39 @@ def checkStartTagRequiredAttributes(self, token):
283304
284305 def checkStartTagUnknownAttributes (self , token ):
285306 # check for recognized attribute names
286- name = token [ "name" ] .lower ()
307+ name = token . get ( "name" ) .lower ()
287308 allowedAttributes = globalAttributes | allowedAttributeMap .get (name , frozenset (()))
288- for attrName , attrValue in token [ "data" ] :
309+ for attrName , attrValue in token . get ( "data" , []) :
289310 if attrName .lower () not in allowedAttributes :
290311 yield {"type" : "ParseError" ,
291312 "data" : "unknown-attribute" ,
292313 "datavars" : {"tagName" : name ,
293314 "attributeName" : attrName }}
294315
316+ def validateAttributeValueClass (self , token , tagName , attrName , attrValue ):
317+ for t in self .checkTokenList (tagName , attrName , attrValue ) or []:
318+ yield t
319+ yield {"type" : "ParseError" ,
320+ "data" : "invalid-class-attribute" ,
321+ "datavars" : {"tagName" : tagName }}
322+
323+ def checkTokenList (self , tagName , attrName , attrValue ):
324+ # The "token" in the method name refers to tokens in an attribute value
325+ # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
326+ # but the "token" parameter refers to the token generated from
327+ # HTMLTokenizer. Sorry for the confusion.
328+ valueList = []
329+ currentValue = ''
330+ for c in attrValue + ' ' :
331+ if c in spaceCharacters :
332+ if currentValue :
333+ if currentValue in valueList :
334+ yield {"type" : "ParseError" ,
335+ "data" : "duplicate-value-in-token-list" ,
336+ "datavars" : {"tagName" : tagName ,
337+ "attributeName" : attrName ,
338+ "attributeValue" : currentValue }}
339+ valueList .append (currentValue )
340+ currentValue = ''
341+ else :
342+ currentValue += c
0 commit comments