Skip to content

Commit 27b1ca2

Browse files
committed
Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding (which may be ASCII if no locale environment variable is set), to avoid inconsistencies with os.fsencode() and os.fsdecode() functions which are already using UTF-8/surrogateescape.
1 parent ce31f66 commit 27b1ca2

File tree

4 files changed

+65
-18
lines changed

4 files changed

+65
-18
lines changed

Misc/NEWS

+6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ What's New in Python 3.2.4
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #16416: On Mac OS X, operating system data are now always
14+
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
15+
(which may be ASCII if no locale environment variable is set), to avoid
16+
inconsistencies with os.fsencode() and os.fsdecode() functions which are
17+
already using UTF-8/surrogateescape.
18+
1319
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread.h
1420

1521
- Issue #16306: Fix multiple error messages when unknown command line

Modules/python.c

-8
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
1515
}
1616
#else
1717

18-
#ifdef __APPLE__
19-
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
20-
#endif
21-
2218
int
2319
main(int argc, char **argv)
2420
{
@@ -45,11 +41,7 @@ main(int argc, char **argv)
4541
oldloc = strdup(setlocale(LC_ALL, NULL));
4642
setlocale(LC_ALL, "");
4743
for (i = 0; i < argc; i++) {
48-
#ifdef __APPLE__
49-
argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
50-
#else
5144
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
52-
#endif
5345
if (!argv_copy[i]) {
5446
fprintf(stderr, "Fatal Python error: "
5547
"unable to decode the command line argument #%i\n",

Objects/unicodeobject.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -2792,7 +2792,10 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
27922792
#ifdef __APPLE__
27932793

27942794
/* Simplified UTF-8 decoder using surrogateescape error handler,
2795-
used to decode the command line arguments on Mac OS X. */
2795+
used to decode the command line arguments on Mac OS X.
2796+
2797+
Return a pointer to a newly allocated wide character string (use
2798+
PyMem_Free() to free the memory), or NULL on memory allocation error. */
27962799

27972800
wchar_t*
27982801
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -2803,10 +2806,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
28032806

28042807
/* Note: size will always be longer than the resulting Unicode
28052808
character count */
2806-
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
2807-
PyErr_NoMemory();
2809+
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
28082810
return NULL;
2809-
}
28102811
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
28112812
if (!unicode)
28122813
return NULL;

Python/fileutils.c

+54-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
# include <windows.h>
44
#endif
55

6+
#ifdef __APPLE__
7+
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
8+
#endif
9+
610
#ifdef HAVE_STAT
711

812
/* Decode a byte string from the locale encoding with the
@@ -23,6 +27,17 @@
2327
wchar_t*
2428
_Py_char2wchar(const char* arg, size_t *size)
2529
{
30+
#ifdef __APPLE__
31+
wchar_t *wstr;
32+
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
33+
if (size != NULL) {
34+
if (wstr != NULL)
35+
*size = wcslen(wstr);
36+
else
37+
*size = (size_t)-1;
38+
}
39+
return wstr;
40+
#else
2641
wchar_t *res;
2742
#ifdef HAVE_BROKEN_MBSTOWCS
2843
/* Some platforms have a broken implementation of
@@ -107,7 +122,7 @@ _Py_char2wchar(const char* arg, size_t *size)
107122
argsize -= converted;
108123
out++;
109124
}
110-
#else
125+
#else /* HAVE_MBRTOWC */
111126
/* Cannot use C locale for escaping; manually escape as if charset
112127
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
113128
correctly in the locale's charset, which must be an ASCII superset. */
@@ -121,13 +136,14 @@ _Py_char2wchar(const char* arg, size_t *size)
121136
else
122137
*out++ = 0xdc00 + *in++;
123138
*out = 0;
124-
#endif
139+
#endif /* HAVE_MBRTOWC */
125140
if (size != NULL)
126141
*size = out - res;
127142
return res;
128143
oom:
129144
fprintf(stderr, "out of memory\n");
130145
return NULL;
146+
#endif /* __APPLE__ */
131147
}
132148

133149
/* Encode a (wide) character string to the locale encoding with the
@@ -144,14 +160,42 @@ _Py_char2wchar(const char* arg, size_t *size)
144160
char*
145161
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
146162
{
163+
#ifdef __APPLE__
164+
Py_ssize_t len;
165+
PyObject *unicode, *bytes = NULL;
166+
char *cpath;
167+
168+
unicode = PyUnicode_FromWideChar(text, wcslen(text));
169+
if (unicode == NULL)
170+
return NULL;
171+
172+
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
173+
Py_DECREF(unicode);
174+
if (bytes == NULL) {
175+
PyErr_Clear();
176+
if (error_pos != NULL)
177+
*error_pos = (size_t)-1;
178+
return NULL;
179+
}
180+
181+
len = PyBytes_GET_SIZE(bytes);
182+
cpath = PyMem_Malloc(len+1);
183+
if (cpath == NULL) {
184+
PyErr_Clear();
185+
Py_DECREF(bytes);
186+
if (error_pos != NULL)
187+
*error_pos = (size_t)-1;
188+
return NULL;
189+
}
190+
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
191+
Py_DECREF(bytes);
192+
return cpath;
193+
#else /* __APPLE__ */
147194
const size_t len = wcslen(text);
148195
char *result = NULL, *bytes = NULL;
149196
size_t i, size, converted;
150197
wchar_t c, buf[2];
151198

152-
if (error_pos != NULL)
153-
*error_pos = (size_t)-1;
154-
155199
/* The function works in two steps:
156200
1. compute the length of the output buffer in bytes (size)
157201
2. outputs the bytes */
@@ -198,11 +242,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
198242

199243
size += 1; /* nul byte at the end */
200244
result = PyMem_Malloc(size);
201-
if (result == NULL)
245+
if (result == NULL) {
246+
if (error_pos != NULL)
247+
*error_pos = (size_t)-1;
202248
return NULL;
249+
}
203250
bytes = result;
204251
}
205252
return result;
253+
#endif /* __APPLE__ */
206254
}
207255

208256
/* In principle, this should use HAVE__WSTAT, and _wstat

0 commit comments

Comments
 (0)