Skip to content

Commit 85e729e

Browse files
committed
Take the first step in resolving the messy pkgutil vs importlib edge cases by basing pkgutil explicitly on importlib, deprecating its internal import emulation and setting __main__.__loader__ correctly so that runpy still works (Affects #15343, #15314, #15357)
1 parent f96cf91 commit 85e729e

File tree

7 files changed

+263
-145
lines changed

7 files changed

+263
-145
lines changed

Doc/library/pkgutil.rst

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,32 @@ support.
5656
Note that :class:`ImpImporter` does not currently support being used by
5757
placement on :data:`sys.meta_path`.
5858

59+
.. deprecated:: 3.3
60+
This emulation is no longer needed, as the standard import mechanism
61+
is now fully PEP 302 compliant and available in :mod:`importlib`
62+
5963

6064
.. class:: ImpLoader(fullname, file, filename, etc)
6165

6266
:pep:`302` Loader that wraps Python's "classic" import algorithm.
6367

68+
.. deprecated:: 3.3
69+
This emulation is no longer needed, as the standard import mechanism
70+
is now fully PEP 302 compliant and available in :mod:`importlib`
71+
6472

6573
.. function:: find_loader(fullname)
6674

67-
Find a :pep:`302` "loader" object for *fullname*.
75+
Retrieve a :pep:`302` module loader for the given *fullname*.
6876

69-
If *fullname* contains dots, path must be the containing package's
70-
``__path__``. Returns ``None`` if the module cannot be found or imported.
71-
This function uses :func:`iter_importers`, and is thus subject to the same
72-
limitations regarding platform-specific special import locations such as the
73-
Windows registry.
77+
This is a convenience wrapper around :func:`importlib.find_loader` that
78+
sets the *path* argument correctly when searching for submodules, and
79+
also ensures parent packages (if any) are imported before searching for
80+
submodules.
81+
82+
.. versionchanged:: 3.3
83+
Updated to be based directly on :mod:`importlib` rather than relying
84+
on a package internal PEP 302 import emulation.
7485

7586

7687
.. function:: get_importer(path_item)
@@ -80,13 +91,13 @@ support.
8091
The returned importer is cached in :data:`sys.path_importer_cache` if it was
8192
newly created by a path hook.
8293

83-
If there is no importer, a wrapper around the basic import machinery is
84-
returned. This wrapper is never inserted into the importer cache (``None``
85-
is inserted instead).
86-
8794
The cache (or part of it) can be cleared manually if a rescan of
8895
:data:`sys.path_hooks` is necessary.
8996

97+
.. versionchanged:: 3.3
98+
Updated to be based directly on :mod:`importlib` rather than relying
99+
on a package internal PEP 302 import emulation.
100+
90101

91102
.. function:: get_loader(module_or_name)
92103

@@ -102,31 +113,27 @@ support.
102113
limitations regarding platform-specific special import locations such as the
103114
Windows registry.
104115

116+
.. versionchanged:: 3.3
117+
Updated to be based directly on :mod:`importlib` rather than relying
118+
on a package internal PEP 302 import emulation.
119+
105120

106121
.. function:: iter_importers(fullname='')
107122

108123
Yield :pep:`302` importers for the given module name.
109124

110-
If fullname contains a '.', the importers will be for the package containing
111-
fullname, otherwise they will be importers for :data:`sys.meta_path`,
112-
:data:`sys.path`, and Python's "classic" import machinery, in that order. If
113-
the named module is in a package, that package is imported as a side effect
114-
of invoking this function.
115-
116-
Non-:pep:`302` mechanisms (e.g. the Windows registry) used by the standard
117-
import machinery to find files in alternative locations are partially
118-
supported, but are searched *after* :data:`sys.path`. Normally, these
119-
locations are searched *before* :data:`sys.path`, preventing :data:`sys.path`
120-
entries from shadowing them.
121-
122-
For this to cause a visible difference in behaviour, there must be a module
123-
or package name that is accessible via both :data:`sys.path` and one of the
124-
non-:pep:`302` file system mechanisms. In this case, the emulation will find
125-
the former version, while the builtin import mechanism will find the latter.
126-
127-
Items of the following types can be affected by this discrepancy:
128-
``imp.C_EXTENSION``, ``imp.PY_SOURCE``, ``imp.PY_COMPILED``,
129-
``imp.PKG_DIRECTORY``.
125+
If fullname contains a '.', the importers will be for the package
126+
containing fullname, otherwise they will be all registered top level
127+
importers (i.e. those on both sys.meta_path and sys.path_hooks).
128+
129+
If the named module is in a package, that package is imported as a side
130+
effect of invoking this function.
131+
132+
If no module name is specified, all top level importers are produced.
133+
134+
.. versionchanged:: 3.3
135+
Updated to be based directly on :mod:`importlib` rather than relying
136+
on a package internal PEP 302 import emulation.
130137

131138

132139
.. function:: iter_modules(path=None, prefix='')

Lib/pkgutil.py

Lines changed: 43 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import os
44
import sys
55
import imp
6+
import importlib
67
import os.path
8+
from warnings import warn
79
from types import ModuleType
810

911
__all__ = [
@@ -168,6 +170,8 @@ class ImpImporter:
168170
"""
169171

170172
def __init__(self, path=None):
173+
warn("This emulation is deprecated, use 'importlib' instead",
174+
DeprecationWarning)
171175
self.path = path
172176

173177
def find_module(self, fullname, path=None):
@@ -232,6 +236,8 @@ class ImpLoader:
232236
code = source = None
233237

234238
def __init__(self, fullname, file, filename, etc):
239+
warn("This emulation is deprecated, use 'importlib' instead",
240+
DeprecationWarning)
235241
self.file = file
236242
self.filename = filename
237243
self.fullname = fullname
@@ -366,10 +372,6 @@ def get_importer(path_item):
366372
The returned importer is cached in sys.path_importer_cache
367373
if it was newly created by a path hook.
368374
369-
If there is no importer, a wrapper around the basic import
370-
machinery is returned. This wrapper is never inserted into
371-
the importer cache (None is inserted instead).
372-
373375
The cache (or part of it) can be cleared manually if a
374376
rescan of sys.path_hooks is necessary.
375377
"""
@@ -384,66 +386,45 @@ def get_importer(path_item):
384386
except ImportError:
385387
pass
386388
else:
387-
try:
388-
importer = ImpImporter(path_item)
389-
except ImportError:
390-
importer = None
389+
importer = None
391390
return importer
392391

393392

394393
def iter_importers(fullname=""):
395394
"""Yield PEP 302 importers for the given module name
396395
397396
If fullname contains a '.', the importers will be for the package
398-
containing fullname, otherwise they will be importers for sys.meta_path,
399-
sys.path, and Python's "classic" import machinery, in that order. If
400-
the named module is in a package, that package is imported as a side
401-
effect of invoking this function.
397+
containing fullname, otherwise they will be all registered top level
398+
importers (i.e. those on both sys.meta_path and sys.path_hooks).
402399
403-
Non PEP 302 mechanisms (e.g. the Windows registry) used by the
404-
standard import machinery to find files in alternative locations
405-
are partially supported, but are searched AFTER sys.path. Normally,
406-
these locations are searched BEFORE sys.path, preventing sys.path
407-
entries from shadowing them.
408-
409-
For this to cause a visible difference in behaviour, there must
410-
be a module or package name that is accessible via both sys.path
411-
and one of the non PEP 302 file system mechanisms. In this case,
412-
the emulation will find the former version, while the builtin
413-
import mechanism will find the latter.
400+
If the named module is in a package, that package is imported as a side
401+
effect of invoking this function.
414402
415-
Items of the following types can be affected by this discrepancy:
416-
imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
403+
If no module name is specified, all top level importers are produced.
417404
"""
418405
if fullname.startswith('.'):
419-
raise ImportError("Relative module names not supported")
406+
msg = "Relative module name {!r} not supported".format(fullname)
407+
raise ImportError(msg)
420408
if '.' in fullname:
421409
# Get the containing package's __path__
422-
pkg = '.'.join(fullname.split('.')[:-1])
423-
if pkg not in sys.modules:
424-
__import__(pkg)
425-
path = getattr(sys.modules[pkg], '__path__', None) or []
410+
pkg_name = fullname.rpartition(".")[0]
411+
pkg = importlib.import_module(pkg)
412+
path = getattr(sys.modules[pkg], '__path__', None)
413+
if path is None:
414+
return
426415
else:
427416
for importer in sys.meta_path:
428417
yield importer
429418
path = sys.path
430419
for item in path:
431420
yield get_importer(item)
432-
if '.' not in fullname:
433-
yield ImpImporter()
434421

435422
def get_loader(module_or_name):
436423
"""Get a PEP 302 "loader" object for module_or_name
437424
438-
If the module or package is accessible via the normal import
439-
mechanism, a wrapper around the relevant part of that machinery
440-
is returned. Returns None if the module cannot be found or imported.
425+
Returns None if the module cannot be found or imported.
441426
If the named module is not already imported, its containing package
442427
(if any) is imported, in order to establish the package __path__.
443-
444-
This function uses iter_importers(), and is thus subject to the same
445-
limitations regarding platform-specific special import locations such
446-
as the Windows registry.
447428
"""
448429
if module_or_name in sys.modules:
449430
module_or_name = sys.modules[module_or_name]
@@ -457,22 +438,33 @@ def get_loader(module_or_name):
457438
fullname = module_or_name
458439
return find_loader(fullname)
459440

441+
460442
def find_loader(fullname):
461443
"""Find a PEP 302 "loader" object for fullname
462444
463-
If fullname contains dots, path must be the containing package's __path__.
464-
Returns None if the module cannot be found or imported. This function uses
465-
iter_importers(), and is thus subject to the same limitations regarding
466-
platform-specific special import locations such as the Windows registry.
445+
This is s convenience wrapper around :func:`importlib.find_loader` that
446+
sets the *path* argument correctly when searching for submodules, and
447+
also ensures parent packages (if any) are imported before searching for
448+
submodules.
467449
"""
468-
for importer in iter_importers(fullname):
469-
if importer is None:
470-
continue
471-
loader = importer.find_module(fullname)
472-
if loader is not None:
473-
return loader
474-
475-
return None
450+
if fullname.startswith('.'):
451+
msg = "Relative module name {!r} not supported".format(fullname)
452+
raise ImportError(msg)
453+
path = None
454+
pkg_name = fullname.rpartition(".")[0]
455+
if pkg_name:
456+
pkg = importlib.import_module(pkg_name)
457+
path = getattr(pkg, "__path__", None)
458+
if path is None:
459+
return None
460+
try:
461+
return importlib.find_loader(fullname, path)
462+
except (ImportError, AttributeError, TypeError, ValueError) as ex:
463+
# This hack fixes an impedance mismatch between pkgutil and
464+
# importlib, where the latter throws other errors for cases where
465+
# pkgutil previously threw ImportError
466+
msg = "Error while finding loader for {!r} ({}: {})"
467+
raise ImportError(msg.format(fullname, type(ex), ex)) from ex
476468

477469

478470
def extend_path(path, name):

Lib/runpy.py

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,8 @@
1313
import os
1414
import sys
1515
import imp
16-
from pkgutil import read_code
17-
try:
18-
from imp import get_loader
19-
except ImportError:
20-
from pkgutil import get_loader
16+
import importlib.machinery
17+
from pkgutil import read_code, get_loader, get_importer
2118

2219
__all__ = [
2320
"run_module", "run_path",
@@ -154,6 +151,7 @@ def _run_module_as_main(mod_name, alter_argv=True):
154151
# know what the code was looking for
155152
info = "can't find '__main__' module in %r" % sys.argv[0]
156153
msg = "%s: %s" % (sys.executable, info)
154+
raise
157155
sys.exit(msg)
158156
pkg_name = mod_name.rpartition('.')[0]
159157
main_globals = sys.modules["__main__"].__dict__
@@ -183,44 +181,34 @@ def run_module(mod_name, init_globals=None,
183181
def _get_main_module_details():
184182
# Helper that gives a nicer error message when attempting to
185183
# execute a zipfile or directory by invoking __main__.py
184+
# Also moves the standard __main__ out of the way so that the
185+
# preexisting __loader__ entry doesn't cause issues
186186
main_name = "__main__"
187+
saved_main = sys.modules[main_name]
188+
del sys.modules[main_name]
187189
try:
188190
return _get_module_details(main_name)
189191
except ImportError as exc:
190192
if main_name in str(exc):
191193
raise ImportError("can't find %r module in %r" %
192-
(main_name, sys.path[0]))
194+
(main_name, sys.path[0])) from exc
193195
raise
196+
finally:
197+
sys.modules[main_name] = saved_main
194198

195199

196-
# XXX (ncoghlan): Perhaps expose the C API function
197-
# as imp.get_importer instead of reimplementing it in Python?
198-
def _get_importer(path_name):
199-
"""Python version of PyImport_GetImporter C API function"""
200-
cache = sys.path_importer_cache
201-
try:
202-
importer = cache[path_name]
203-
except KeyError:
204-
for hook in sys.path_hooks:
205-
try:
206-
importer = hook(path_name)
207-
break
208-
except ImportError:
209-
pass
210-
else:
211-
importer = None
212-
cache[path_name] = importer
213-
return importer
214-
215-
def _get_code_from_file(fname):
200+
def _get_code_from_file(run_name, fname):
216201
# Check for a compiled file first
217202
with open(fname, "rb") as f:
218203
code = read_code(f)
219204
if code is None:
220205
# That didn't work, so try it as normal source code
221206
with open(fname, "rb") as f:
222207
code = compile(f.read(), fname, 'exec')
223-
return code
208+
loader = importlib.machinery.SourceFileLoader(run_name, fname)
209+
else:
210+
loader = importlib.machinery.SourcelessFileLoader(run_name, fname)
211+
return code, loader
224212

225213
def run_path(path_name, init_globals=None, run_name=None):
226214
"""Execute code located at the specified filesystem location
@@ -235,13 +223,13 @@ def run_path(path_name, init_globals=None, run_name=None):
235223
if run_name is None:
236224
run_name = "<run_path>"
237225
pkg_name = run_name.rpartition(".")[0]
238-
importer = _get_importer(path_name)
226+
importer = get_importer(path_name)
239227
if isinstance(importer, (type(None), imp.NullImporter)):
240228
# Not a valid sys.path entry, so run the code directly
241229
# execfile() doesn't help as we want to allow compiled files
242-
code = _get_code_from_file(path_name)
230+
code, mod_loader = _get_code_from_file(run_name, path_name)
243231
return _run_module_code(code, init_globals, run_name, path_name,
244-
pkg_name=pkg_name)
232+
mod_loader, pkg_name)
245233
else:
246234
# Importer is defined for path, so add it to
247235
# the start of sys.path
@@ -253,13 +241,7 @@ def run_path(path_name, init_globals=None, run_name=None):
253241
# have no choice and we have to remove it even while we read the
254242
# code. If we don't do this, a __loader__ attribute in the
255243
# existing __main__ module may prevent location of the new module.
256-
main_name = "__main__"
257-
saved_main = sys.modules[main_name]
258-
del sys.modules[main_name]
259-
try:
260-
mod_name, loader, code, fname = _get_main_module_details()
261-
finally:
262-
sys.modules[main_name] = saved_main
244+
mod_name, loader, code, fname = _get_main_module_details()
263245
with _TempModule(run_name) as temp_module, \
264246
_ModifiedArgv0(path_name):
265247
mod_globals = temp_module.module.__dict__

0 commit comments

Comments
 (0)