Skip to content

Commit fd4722c

Browse files
committed
Issue #9548: Add a minimal "_bootlocale" module that is imported by the _io module instead of the full locale module.
1 parent 73abc52 commit fd4722c

File tree

6 files changed

+49
-22
lines changed

6 files changed

+49
-22
lines changed

Lib/_bootlocale.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""A minimal subset of the locale module used at interpreter startup
2+
(imported by the _io module), in order to reduce startup time.
3+
4+
Don't import directly from third-party code; use the `locale` module instead!
5+
"""
6+
7+
import sys
8+
import _locale
9+
10+
if sys.platform.startswith("win"):
11+
def getpreferredencoding(do_setlocale=True):
12+
return _locale._getdefaultlocale()[1]
13+
else:
14+
try:
15+
_locale.CODESET
16+
except ImportError:
17+
def getpreferredencoding(do_setlocale=True):
18+
# This path for legacy systems needs the more complex
19+
# getdefaultlocale() function, import the full locale module.
20+
import locale
21+
return locale.getpreferredencoding(do_setlocale)
22+
else:
23+
def getpreferredencoding(do_setlocale=True):
24+
assert not do_setlocale
25+
result = _locale.nl_langinfo(_locale.CODESET)
26+
if not result and sys.platform == 'darwin':
27+
# nl_langinfo can return an empty string
28+
# when the setting has an invalid value.
29+
# Default to UTF-8 in that case because
30+
# UTF-8 is the default charset on OSX and
31+
# returning nothing will crash the
32+
# interpreter.
33+
result = 'UTF-8'
34+
return result

Lib/locale.py

+5-16
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,8 @@ def resetlocale(category=LC_ALL):
554554
# On Win32, this will return the ANSI code page
555555
def getpreferredencoding(do_setlocale = True):
556556
"""Return the charset that the user is likely using."""
557-
import _locale
558-
return _locale._getdefaultlocale()[1]
557+
import _bootlocale
558+
return _bootlocale.getpreferredencoding(False)
559559
else:
560560
# On Unix, if CODESET is available, use that.
561561
try:
@@ -574,27 +574,16 @@ def getpreferredencoding(do_setlocale = True):
574574
def getpreferredencoding(do_setlocale = True):
575575
"""Return the charset that the user is likely using,
576576
according to the system configuration."""
577+
import _bootlocale
577578
if do_setlocale:
578579
oldloc = setlocale(LC_CTYPE)
579580
try:
580581
setlocale(LC_CTYPE, "")
581582
except Error:
582583
pass
583-
result = nl_langinfo(CODESET)
584-
if not result and sys.platform == 'darwin':
585-
# nl_langinfo can return an empty string
586-
# when the setting has an invalid value.
587-
# Default to UTF-8 in that case because
588-
# UTF-8 is the default charset on OSX and
589-
# returning nothing will crash the
590-
# interpreter.
591-
result = 'UTF-8'
584+
result = _bootlocale.getpreferredencoding(False)
585+
if do_setlocale:
592586
setlocale(LC_CTYPE, oldloc)
593-
else:
594-
result = nl_langinfo(CODESET)
595-
if not result and sys.platform == 'darwin':
596-
# See above for explanation
597-
result = 'UTF-8'
598587
return result
599588

600589

Lib/site.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,8 @@ def aliasmbcs():
426426
while they are always available as "mbcs" in each locale. Make
427427
them usable by aliasing to "mbcs" in such a case."""
428428
if sys.platform == 'win32':
429-
import locale, codecs
430-
enc = locale.getdefaultlocale()[1]
429+
import _bootlocale, codecs
430+
enc = _bootlocale.getpreferredencoding(False)
431431
if enc.startswith('cp'): # "cp***" ?
432432
try:
433433
codecs.lookup(enc)

Lib/test/test_subprocess.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -883,8 +883,9 @@ def test_universal_newlines_communicate_encodings(self):
883883
#
884884
# UTF-16 and UTF-32-BE are sufficient to check both with BOM and
885885
# without, and UTF-16 and UTF-32.
886+
import _bootlocale
886887
for encoding in ['utf-16', 'utf-32-be']:
887-
old_getpreferredencoding = locale.getpreferredencoding
888+
old_getpreferredencoding = _bootlocale.getpreferredencoding
888889
# Indirectly via io.TextIOWrapper, Popen() defaults to
889890
# locale.getpreferredencoding(False) and earlier in Python 3.2 to
890891
# locale.getpreferredencoding().
@@ -895,7 +896,7 @@ def getpreferredencoding(do_setlocale=True):
895896
encoding)
896897
args = [sys.executable, '-c', code]
897898
try:
898-
locale.getpreferredencoding = getpreferredencoding
899+
_bootlocale.getpreferredencoding = getpreferredencoding
899900
# We set stdin to be non-None because, as of this writing,
900901
# a different code path is used when the number of pipes is
901902
# zero or one.
@@ -904,7 +905,7 @@ def getpreferredencoding(do_setlocale=True):
904905
stdout=subprocess.PIPE)
905906
stdout, stderr = popen.communicate(input='')
906907
finally:
907-
locale.getpreferredencoding = old_getpreferredencoding
908+
_bootlocale.getpreferredencoding = old_getpreferredencoding
908909
self.assertEqual(stdout, '1\n2\n3\n4')
909910

910911
def test_no_leaking(self):

Misc/NEWS

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ Core and Builtins
3636
Library
3737
-------
3838

39+
- Issue #9548: Add a minimal "_bootlocale" module that is imported by the
40+
_io module instead of the full locale module.
41+
3942
- Issue #18764: remove the 'print' alias for the PDB 'p' command so that it no
4043
longer shadows the print function.
4144

Modules/_io/_iomodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ _PyIO_get_locale_module(_PyIO_State *state)
546546
}
547547
Py_CLEAR(state->locale_module);
548548
}
549-
mod = PyImport_ImportModule("locale");
549+
mod = PyImport_ImportModule("_bootlocale");
550550
if (mod == NULL)
551551
return NULL;
552552
state->locale_module = PyWeakref_NewRef(mod, NULL);

0 commit comments

Comments
 (0)