19
19
from sets import Set as set
20
20
from sets import ImmutableSet as frozenset
21
21
import _base
22
- from html5lib .constants import E
22
+ from html5lib .constants import E , spaceCharacters
23
23
from html5lib import tokenizer
24
24
import gettext
25
25
_ = gettext .gettext
37
37
_ (u"'%(attributeName)s' attribute is not allowed on <input type=%(inputType)s>." ),
38
38
"deprecated-attribute" :
39
39
_ (u"'%(attributeName)s' attribute is deprecated on <%(tagName)s>." ),
40
+ "invalid-class-attribute" :
41
+ _ (u"Invalid class attribute value on <%(tagName)s>." ),
42
+ "duplicate-value-in-token-list" :
43
+ _ (u"Duplicate value '%(attributeValue)s' in token list in '%(attributeName)s' attribute on <%(tagName)s>." ),
40
44
})
41
45
42
46
globalAttributes = frozenset (('class' , 'contenteditable' , 'contextmenu' , 'dir' ,
@@ -225,18 +229,35 @@ def __iter__(self):
225
229
for t in method (token ) or []: yield t
226
230
yield token
227
231
232
+ def checkAttributeValues (self , token ):
233
+ tagName = token .get ("name" , "" )
234
+ fakeToken = {"tagName" : tagName .capitalize ()}
235
+ for attrName , attrValue in token .get ("data" , []):
236
+ attrName = attrName .lower ()
237
+ fakeToken ["attributeName" ] = attrName .capitalize ()
238
+ method = getattr (self , "validateAttributeValue%(tagName)s%(attributeName)s" % fakeToken , None )
239
+ if method :
240
+ for t in method (token , tagName , attrName , attrValue ) or []: yield t
241
+ else :
242
+ method = getattr (self , "validateAttributeValue%(attributeName)s" % fakeToken , None )
243
+ if method :
244
+ for t in method (token , tagName , attrName , attrValue ) or []: yield t
245
+
228
246
def validateStartTag (self , token ):
229
247
for t in self .checkUnknownStartTag (token ) or []: yield t
230
248
for t in self .checkStartTagRequiredAttributes (token ) or []: yield t
231
249
for t in self .checkStartTagUnknownAttributes (token ) or []: yield t
250
+ for t in self .checkAttributeValues (token ) or []: yield t
232
251
233
252
def validateStartTagEmbed (self , token ):
234
253
for t in self .checkStartTagRequiredAttributes (token ) or []: yield t
254
+ for t in self .checkAttributeValues (token ) or []: yield t
235
255
# spec says "any attributes w/o namespace"
236
256
# so don't call checkStartTagUnknownAttributes
237
257
238
258
def validateStartTagInput (self , token ):
239
- attrDict = dict ([(name .lower (), value ) for name , value in token ["data" ]])
259
+ for t in self .checkAttributeValues (token ) or []: yield t
260
+ attrDict = dict ([(name .lower (), value ) for name , value in token .get ("data" , [])])
240
261
inputType = attrDict .get ("type" , "text" )
241
262
if inputType not in inputTypeAllowedAttributeMap .keys ():
242
263
yield {"type" : "ParseError" ,
@@ -262,18 +283,18 @@ def validateStartTagInput(self, token):
262
283
263
284
def checkUnknownStartTag (self , token ):
264
285
# check for recognized tag name
265
- name = token [ "name" ] .lower ()
286
+ name = token . get ( "name" , "" ) .lower ()
266
287
if name not in allowedAttributeMap .keys ():
267
288
yield {"type" : "ParseError" ,
268
289
"data" : "unknown-start-tag" ,
269
290
"datavars" : {"tagName" : name }}
270
291
271
292
def checkStartTagRequiredAttributes (self , token ):
272
293
# check for presence of required attributes
273
- name = token [ "name" ] .lower ()
294
+ name = token . get ( "name" , "" ) .lower ()
274
295
if name in requiredAttributeMap .keys ():
275
296
attrsPresent = [attrName for attrName , attrValue
276
- in token [ "data" ] ]
297
+ in token . get ( "data" , []) ]
277
298
for attrName in requiredAttributeMap [name ]:
278
299
if attrName not in attrsPresent :
279
300
yield {"type" : "ParseError" ,
@@ -283,12 +304,39 @@ def checkStartTagRequiredAttributes(self, token):
283
304
284
305
def checkStartTagUnknownAttributes (self , token ):
285
306
# check for recognized attribute names
286
- name = token [ "name" ] .lower ()
307
+ name = token . get ( "name" ) .lower ()
287
308
allowedAttributes = globalAttributes | allowedAttributeMap .get (name , frozenset (()))
288
- for attrName , attrValue in token [ "data" ] :
309
+ for attrName , attrValue in token . get ( "data" , []) :
289
310
if attrName .lower () not in allowedAttributes :
290
311
yield {"type" : "ParseError" ,
291
312
"data" : "unknown-attribute" ,
292
313
"datavars" : {"tagName" : name ,
293
314
"attributeName" : attrName }}
294
315
316
+ def validateAttributeValueClass (self , token , tagName , attrName , attrValue ):
317
+ for t in self .checkTokenList (tagName , attrName , attrValue ) or []:
318
+ yield t
319
+ yield {"type" : "ParseError" ,
320
+ "data" : "invalid-class-attribute" ,
321
+ "datavars" : {"tagName" : tagName }}
322
+
323
+ def checkTokenList (self , tagName , attrName , attrValue ):
324
+ # The "token" in the method name refers to tokens in an attribute value
325
+ # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
326
+ # but the "token" parameter refers to the token generated from
327
+ # HTMLTokenizer. Sorry for the confusion.
328
+ valueList = []
329
+ currentValue = ''
330
+ for c in attrValue + ' ' :
331
+ if c in spaceCharacters :
332
+ if currentValue :
333
+ if currentValue in valueList :
334
+ yield {"type" : "ParseError" ,
335
+ "data" : "duplicate-value-in-token-list" ,
336
+ "datavars" : {"tagName" : tagName ,
337
+ "attributeName" : attrName ,
338
+ "attributeValue" : currentValue }}
339
+ valueList .append (currentValue )
340
+ currentValue = ''
341
+ else :
342
+ currentValue += c
0 commit comments