Skip to content

Commit bc4ceca

Browse files
committed
Fix serializer to really prefer lowercase entities
1 parent ad96596 commit bc4ceca

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

html5lib/serializer/htmlserializer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@
2727
is_ucs4 = len(u"\U0010FFFF") == 1
2828
for k, v in entities.items():
2929
#skip multi-character entities
30-
if ((is_ucs4 and len(v) > 1) or
30+
if ((is_ucs4 and len(v) > 1) or
3131
(not is_ucs4 and len(v) > 2)):
3232
continue
33-
if v != "&" and encode_entity_map.get(v) != k.lower():
34-
# prefer < over < and similarly for &, >, etc.
33+
if v != "&":
3534
if len(v) == 2:
3635
v = utils.surrogatePairToCodepoint(v)
3736
else:
@@ -40,7 +39,9 @@
4039
except:
4140
print v
4241
raise
43-
encode_entity_map[v] = k
42+
if not v in encode_entity_map or k.islower():
43+
# prefer < over < and similarly for &, >, etc.
44+
encode_entity_map[v] = k
4445

4546
def htmlentityreplace_errors(exc):
4647
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):

0 commit comments

Comments
 (0)