Skip to content

Commit e7fc50f

Browse files
committed
Add an errors parameter to open() and TextIOWrapper() to specify error handling.
1 parent c6fe37b commit e7fc50f

File tree

7 files changed

+77
-17
lines changed

7 files changed

+77
-17
lines changed

Include/fileobject.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ extern "C" {
99
#define PY_STDIOTEXTMODE "b"
1010

1111
PyAPI_FUNC(PyObject *) PyFile_FromFd(int, char *, char *, int, char *, char *,
12-
int);
12+
char *, int);
1313
PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int);
1414
PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int);
1515
PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *);

Lib/io.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ def __init__(self, errno, strerror, characters_written=0):
4949
self.characters_written = characters_written
5050

5151

52-
def open(file, mode="r", buffering=None, encoding=None, newline=None,
53-
closefd=True):
52+
def open(file, mode="r", buffering=None, encoding=None, errors=None,
53+
newline=None, closefd=True):
5454
r"""Replacement for the built-in open function.
5555
5656
Args:
@@ -61,6 +61,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
6161
can be: 0 = unbuffered, 1 = line buffered,
6262
larger = fully buffered.
6363
encoding: optional string giving the text encoding.
64+
errors: optional string giving the encoding error handling.
6465
newline: optional newlines specifier; must be None, '', '\n', '\r'
6566
or '\r\n'; all other values are illegal. It controls the
6667
handling of line endings. It works as follows:
@@ -99,7 +100,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
99100
'U': universal newline mode (for backwards compatibility)
100101
101102
Constraints:
102-
- encoding must not be given when a binary mode is given
103+
- encoding or errors must not be given when a binary mode is given
103104
- buffering must not be zero when a text mode is given
104105
105106
Returns:
@@ -115,6 +116,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
115116
raise TypeError("invalid buffering: %r" % buffering)
116117
if encoding is not None and not isinstance(encoding, str):
117118
raise TypeError("invalid encoding: %r" % encoding)
119+
if errors is not None and not isinstance(errors, str):
120+
raise TypeError("invalid errors: %r" % errors)
118121
modes = set(mode)
119122
if modes - set("arwb+tU") or len(mode) > len(modes):
120123
raise ValueError("invalid mode: %r" % mode)
@@ -136,6 +139,8 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
136139
raise ValueError("must have exactly one of read/write/append mode")
137140
if binary and encoding is not None:
138141
raise ValueError("binary mode doesn't take an encoding argument")
142+
if binary and errors is not None:
143+
raise ValueError("binary mode doesn't take an errors argument")
139144
if binary and newline is not None:
140145
raise ValueError("binary mode doesn't take a newline argument")
141146
raw = FileIO(file,
@@ -177,7 +182,7 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None,
177182
buffer.name = file
178183
buffer.mode = mode
179184
return buffer
180-
text = TextIOWrapper(buffer, encoding, newline)
185+
text = TextIOWrapper(buffer, encoding, errors, newline)
181186
text.name = file
182187
text.mode = mode
183188
return text
@@ -1128,7 +1133,7 @@ class TextIOWrapper(TextIOBase):
11281133

11291134
_CHUNK_SIZE = 128
11301135

1131-
def __init__(self, buffer, encoding=None, newline=None):
1136+
def __init__(self, buffer, encoding=None, errors=None, newline=None):
11321137
if newline not in (None, "", "\n", "\r", "\r\n"):
11331138
raise ValueError("illegal newline value: %r" % (newline,))
11341139
if encoding is None:
@@ -1148,8 +1153,15 @@ def __init__(self, buffer, encoding=None, newline=None):
11481153
if not isinstance(encoding, str):
11491154
raise ValueError("invalid encoding: %r" % encoding)
11501155

1156+
if errors is None:
1157+
errors = "strict"
1158+
else:
1159+
if not isinstance(errors, str):
1160+
raise ValueError("invalid errors: %r" % errors)
1161+
11511162
self.buffer = buffer
11521163
self._encoding = encoding
1164+
self._errors = errors
11531165
self._readuniversal = not newline
11541166
self._readtranslate = newline is None
11551167
self._readnl = newline
@@ -1164,6 +1176,10 @@ def __init__(self, buffer, encoding=None, newline=None):
11641176
def encoding(self):
11651177
return self._encoding
11661178

1179+
@property
1180+
def errors(self):
1181+
return self._errors
1182+
11671183
# A word about _snapshot. This attribute is either None, or a
11681184
# tuple (decoder_state, readahead, pending) where decoder_state is
11691185
# the second (integer) item of the decoder state, readahead is the
@@ -1206,7 +1222,7 @@ def write(self, s: str):
12061222
if haslf and self._writetranslate and self._writenl != "\n":
12071223
s = s.replace("\n", self._writenl)
12081224
# XXX What if we were just reading?
1209-
b = s.encode(self._encoding)
1225+
b = s.encode(self._encoding, self._errors)
12101226
self.buffer.write(b)
12111227
if haslf and self.isatty():
12121228
self.flush()
@@ -1220,7 +1236,7 @@ def _get_decoder(self):
12201236
if make_decoder is None:
12211237
raise IOError("Can't find an incremental decoder for encoding %s" %
12221238
self._encoding)
1223-
decoder = make_decoder() # XXX: errors
1239+
decoder = make_decoder(self._errors)
12241240
if self._readuniversal:
12251241
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
12261242
self._decoder = decoder
@@ -1447,9 +1463,11 @@ class StringIO(TextIOWrapper):
14471463

14481464
# XXX This is really slow, but fully functional
14491465

1450-
def __init__(self, initial_value="", encoding="utf-8", newline="\n"):
1466+
def __init__(self, initial_value="", encoding="utf-8",
1467+
errors="strict", newline="\n"):
14511468
super(StringIO, self).__init__(BytesIO(),
14521469
encoding=encoding,
1470+
errors=errors,
14531471
newline=newline)
14541472
if initial_value:
14551473
if not isinstance(initial_value, str):
@@ -1459,4 +1477,4 @@ def __init__(self, initial_value="", encoding="utf-8", newline="\n"):
14591477

14601478
def getvalue(self):
14611479
self.flush()
1462-
return self.buffer.getvalue().decode(self._encoding)
1480+
return self.buffer.getvalue().decode(self._encoding, self._errors)

Lib/test/test_io.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,46 @@ def setUp(self):
496496
def tearDown(self):
497497
test_support.unlink(test_support.TESTFN)
498498

499+
def testEncodingErrorsReading(self):
500+
# (1) default
501+
b = io.BytesIO(b"abc\n\xff\n")
502+
t = io.TextIOWrapper(b, encoding="ascii")
503+
self.assertRaises(UnicodeError, t.read)
504+
# (2) explicit strict
505+
b = io.BytesIO(b"abc\n\xff\n")
506+
t = io.TextIOWrapper(b, encoding="ascii", errors="strict")
507+
self.assertRaises(UnicodeError, t.read)
508+
# (3) ignore
509+
b = io.BytesIO(b"abc\n\xff\n")
510+
t = io.TextIOWrapper(b, encoding="ascii", errors="ignore")
511+
self.assertEquals(t.read(), "abc\n\n")
512+
# (4) replace
513+
b = io.BytesIO(b"abc\n\xff\n")
514+
t = io.TextIOWrapper(b, encoding="ascii", errors="replace")
515+
self.assertEquals(t.read(), "abc\n\ufffd\n")
516+
517+
def testEncodingErrorsWriting(self):
518+
# (1) default
519+
b = io.BytesIO()
520+
t = io.TextIOWrapper(b, encoding="ascii")
521+
self.assertRaises(UnicodeError, t.write, "\xff")
522+
# (2) explicit strict
523+
b = io.BytesIO()
524+
t = io.TextIOWrapper(b, encoding="ascii", errors="strict")
525+
self.assertRaises(UnicodeError, t.write, "\xff")
526+
# (3) ignore
527+
b = io.BytesIO()
528+
t = io.TextIOWrapper(b, encoding="ascii", errors="ignore")
529+
t.write("abc\xffdef\n")
530+
t.flush()
531+
self.assertEquals(b.getvalue(), b"abcdef\n")
532+
# (4) replace
533+
b = io.BytesIO()
534+
t = io.TextIOWrapper(b, encoding="ascii", errors="replace")
535+
t.write("abc\xffdef\n")
536+
t.flush()
537+
self.assertEquals(b.getvalue(), b"abc?def\n")
538+
499539
def testNewlinesInput(self):
500540
testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
501541
normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")

Objects/complexobject.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,7 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
915915
return NULL;
916916
}
917917
cr.real = PyFloat_AsDouble(tmp);
918+
cr.imag = 0.0; /* Shut up compiler warning */
918919
Py_DECREF(tmp);
919920
}
920921
if (i == NULL) {

Objects/fileobject.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,16 @@ extern "C" {
2727

2828
PyObject *
2929
PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding,
30-
char *newline, int closefd)
30+
char *errors, char *newline, int closefd)
3131
{
3232
PyObject *io, *stream, *nameobj = NULL;
3333

3434
io = PyImport_ImportModule("io");
3535
if (io == NULL)
3636
return NULL;
37-
stream = PyObject_CallMethod(io, "open", "isissi", fd, mode,
38-
buffering, encoding, newline, closefd);
37+
stream = PyObject_CallMethod(io, "open", "isisssi", fd, mode,
38+
buffering, encoding, errors,
39+
newline, closefd);
3940
Py_DECREF(io);
4041
if (stream == NULL)
4142
return NULL;

Python/import.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2602,7 +2602,7 @@ call_find_module(char *name, PyObject *path)
26022602
(char*)PyUnicode_GetDefaultEncoding();
26032603
}
26042604
fob = PyFile_FromFd(fd, pathname, fdp->mode, -1,
2605-
(char*)encoding, NULL, 1);
2605+
(char*)encoding, NULL, NULL, 1);
26062606
if (fob == NULL) {
26072607
close(fd);
26082608
PyMem_FREE(found_encoding);

Python/pythonrun.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,7 @@ initstdio(void)
770770
#endif
771771
}
772772
else {
773-
if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL,
773+
if (!(std = PyFile_FromFd(fd, "<stdin>", "r", -1, NULL, NULL,
774774
"\n", 0))) {
775775
goto error;
776776
}
@@ -790,7 +790,7 @@ initstdio(void)
790790
#endif
791791
}
792792
else {
793-
if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL,
793+
if (!(std = PyFile_FromFd(fd, "<stdout>", "w", -1, NULL, NULL,
794794
"\n", 0))) {
795795
goto error;
796796
}
@@ -811,7 +811,7 @@ initstdio(void)
811811
#endif
812812
}
813813
else {
814-
if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL,
814+
if (!(std = PyFile_FromFd(fd, "<stderr>", "w", -1, NULL, NULL,
815815
"\n", 0))) {
816816
goto error;
817817
}

0 commit comments

Comments
 (0)