Skip to content

Commit a92ad7e

Browse files
author
Victor Stinner
committed
Merged revisions 81471-81472 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r81471 | victor.stinner | 2010-05-22 15:37:56 +0200 (sam., 22 mai 2010) | 7 lines Issue #6268: More bugfixes about BOM, UTF-16 and UTF-32 * Fix seek() method of codecs.open(), don't write the BOM twice after seek(0) * Fix reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes * test_codecs: use "w+" mode instead of "wt+". "t" mode is not supported by Solaris or Windows, but does it really exist? I found it the in the issue. ........ r81472 | victor.stinner | 2010-05-22 15:44:25 +0200 (sam., 22 mai 2010) | 4 lines Fix my last commit (r81471) about codecs Rememder: don't touch the code just before a commit ........
1 parent fff532b commit a92ad7e

File tree

5 files changed

+77
-21
lines changed

5 files changed

+77
-21
lines changed

Lib/codecs.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,11 @@ def reset(self):
374374
"""
375375
pass
376376

377+
def seek(self, offset, whence=0):
378+
self.stream.seek(offset, whence)
379+
if whence == 0 and offset == 0:
380+
self.reset()
381+
377382
def __getattr__(self, name,
378383
getattr=getattr):
379384

@@ -606,8 +611,8 @@ def seek(self, offset, whence=0):
606611
607612
Resets the codec buffers used for keeping state.
608613
"""
609-
self.reset()
610614
self.stream.seek(offset, whence)
615+
self.reset()
611616

612617
def __next__(self):
613618

@@ -700,8 +705,10 @@ def reset(self):
700705
self.writer.reset()
701706

702707
def seek(self, offset, whence=0):
703-
self.reader.seek(offset, whence)
704-
self.writer.seek(offset, whence)
708+
self.stream.seek(offset, whence)
709+
self.reader.reset()
710+
if whence == 0 and offset == 0:
711+
self.writer.reset()
705712

706713
def __getattr__(self, name,
707714
getattr=getattr):

Lib/encodings/utf_16.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,23 @@ def setstate(self, state):
103103

104104
class StreamWriter(codecs.StreamWriter):
105105
def __init__(self, stream, errors='strict'):
106-
self.bom_written = False
107106
codecs.StreamWriter.__init__(self, stream, errors)
107+
self.encoder = None
108+
109+
def reset(self):
110+
codecs.StreamWriter.reset(self)
111+
self.encoder = None
108112

109113
def encode(self, input, errors='strict'):
110-
self.bom_written = True
111-
result = codecs.utf_16_encode(input, errors)
112-
if sys.byteorder == 'little':
113-
self.encode = codecs.utf_16_le_encode
114+
if self.encoder is None:
115+
result = codecs.utf_16_encode(input, errors)
116+
if sys.byteorder == 'little':
117+
self.encoder = codecs.utf_16_le_encode
118+
else:
119+
self.encoder = codecs.utf_16_be_encode
120+
return result
114121
else:
115-
self.encode = codecs.utf_16_be_encode
116-
return result
122+
return self.encoder(input, errors)
117123

118124
class StreamReader(codecs.StreamReader):
119125

Lib/encodings/utf_32.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,23 @@ def setstate(self, state):
9898

9999
class StreamWriter(codecs.StreamWriter):
100100
def __init__(self, stream, errors='strict'):
101-
self.bom_written = False
101+
self.encoder = None
102102
codecs.StreamWriter.__init__(self, stream, errors)
103103

104+
def reset(self):
105+
codecs.StreamWriter.reset(self)
106+
self.encoder = None
107+
104108
def encode(self, input, errors='strict'):
105-
self.bom_written = True
106-
result = codecs.utf_32_encode(input, errors)
107-
if sys.byteorder == 'little':
108-
self.encode = codecs.utf_32_le_encode
109+
if self.encoder is None:
110+
result = codecs.utf_32_encode(input, errors)
111+
if sys.byteorder == 'little':
112+
self.encoder = codecs.utf_32_le_encode
113+
else:
114+
self.encoder = codecs.utf_32_be_encode
115+
return result
109116
else:
110-
self.encode = codecs.utf_32_be_encode
111-
return result
117+
return self.encoder(input, errors)
112118

113119
class StreamReader(codecs.StreamReader):
114120

Lib/test/test_codecs.py

+38-2
Original file line numberDiff line numberDiff line change
@@ -1604,15 +1604,51 @@ def test_seek0(self):
16041604
"utf-32-le",
16051605
"utf-32-be")
16061606
for encoding in tests:
1607-
with codecs.open('foo', 'w+', encoding=encoding) as f:
1608-
# Check if the BOM is written only once
1607+
# Check if the BOM is written only once
1608+
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
16091609
f.write(data)
16101610
f.write(data)
16111611
f.seek(0)
16121612
self.assertEquals(f.read(), data * 2)
16131613
f.seek(0)
16141614
self.assertEquals(f.read(), data * 2)
16151615

1616+
# Check that the BOM is written after a seek(0)
1617+
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
1618+
f.write(data[0])
1619+
self.assertNotEquals(f.tell(), 0)
1620+
f.seek(0)
1621+
f.write(data)
1622+
f.seek(0)
1623+
self.assertEquals(f.read(), data)
1624+
1625+
# (StreamWriter) Check that the BOM is written after a seek(0)
1626+
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
1627+
f.writer.write(data[0])
1628+
self.assertNotEquals(f.writer.tell(), 0)
1629+
f.writer.seek(0)
1630+
f.writer.write(data)
1631+
f.seek(0)
1632+
self.assertEquals(f.read(), data)
1633+
1634+
# Check that the BOM is not written after a seek() at a position
1635+
# different than the start
1636+
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
1637+
f.write(data)
1638+
f.seek(f.tell())
1639+
f.write(data)
1640+
f.seek(0)
1641+
self.assertEquals(f.read(), data * 2)
1642+
1643+
# (StreamWriter) Check that the BOM is not written after a seek()
1644+
# at a position different than the start
1645+
with codecs.open(support.TESTFN, 'w+', encoding=encoding) as f:
1646+
f.writer.write(data)
1647+
f.writer.seek(f.writer.tell())
1648+
f.writer.write(data)
1649+
f.seek(0)
1650+
self.assertEquals(f.read(), data * 2)
1651+
16161652

16171653
def test_main():
16181654
support.run_unittest(

Misc/NEWS

+3-2
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,9 @@ C-API
393393
Library
394394
-------
395395

396-
- Issue #6268: Fix seek() method of codecs.open(), don't read the BOM twice
397-
after seek(0)
396+
- Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM
397+
twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and
398+
StreamWriter classes.
398399

399400
- Issue #3798: sys.exit(message) writes the message to sys.stderr file, instead
400401
of the C file stderr, to use stderr encoding and error handler

0 commit comments

Comments
 (0)