Skip to content

Commit 5136ac0

Browse files
committed
Issue #13645: pyc files now contain the size of the corresponding source
code, to avoid timestamp collisions (especially on filesystems with a low timestamp resolution) when checking for freshness of the bytecode.
1 parent 1f918c1 commit 5136ac0

File tree

14 files changed

+166
-48
lines changed

14 files changed

+166
-48
lines changed

Doc/library/importlib.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,30 @@ are also provided to help in implementing the core ABCs.
239239
optimization to speed up loading by removing the parsing step of Python's
240240
compiler, and so no bytecode-specific API is exposed.
241241

242+
.. method:: path_stats(self, path)
243+
244+
Optional abstract method which returns a :class:`dict` containing
245+
metadata about the specifed path. Supported dictionary keys are:
246+
247+
- ``'mtime'`` (mandatory): an integer or floating-point number
248+
representing the modification time of the source code;
249+
- ``'size'`` (optional): the size in bytes of the source code.
250+
251+
Any other keys in the dictionary are ignored, to allow for future
252+
extensions.
253+
254+
.. versionadded:: 3.3
255+
242256
.. method:: path_mtime(self, path)
243257

244258
Optional abstract method which returns the modification time for the
245259
specified path.
246260

261+
.. deprecated:: 3.3
262+
This method is deprecated in favour of :meth:`path_stats`. You don't
263+
have to implement it, but it is still available for compatibility
264+
purposes.
265+
247266
.. method:: set_data(self, path, data)
248267

249268
Optional abstract method which writes the specified bytes to a file

Lib/importlib/_bootstrap.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -331,25 +331,40 @@ def is_package(self, fullname):
331331
filename = self.get_filename(fullname).rpartition(path_sep)[2]
332332
return filename.rsplit('.', 1)[0] == '__init__'
333333

334-
def _bytes_from_bytecode(self, fullname, data, source_mtime):
334+
def _bytes_from_bytecode(self, fullname, data, source_stats):
335335
"""Return the marshalled bytes from bytecode, verifying the magic
336-
number and timestamp along the way.
336+
number, timestamp and source size along the way.
337337
338-
If source_mtime is None then skip the timestamp check.
338+
If source_stats is None then skip the timestamp check.
339339
340340
"""
341341
magic = data[:4]
342342
raw_timestamp = data[4:8]
343+
raw_size = data[8:12]
343344
if len(magic) != 4 or magic != imp.get_magic():
344345
raise ImportError("bad magic number in {}".format(fullname))
345346
elif len(raw_timestamp) != 4:
346347
raise EOFError("bad timestamp in {}".format(fullname))
347-
elif source_mtime is not None:
348-
if marshal._r_long(raw_timestamp) != source_mtime:
349-
raise ImportError("bytecode is stale for {}".format(fullname))
348+
elif len(raw_size) != 4:
349+
raise EOFError("bad size in {}".format(fullname))
350+
if source_stats is not None:
351+
try:
352+
source_mtime = int(source_stats['mtime'])
353+
except KeyError:
354+
pass
355+
else:
356+
if marshal._r_long(raw_timestamp) != source_mtime:
357+
raise ImportError("bytecode is stale for {}".format(fullname))
358+
try:
359+
source_size = source_stats['size'] & 0xFFFFFFFF
360+
except KeyError:
361+
pass
362+
else:
363+
if marshal._r_long(raw_size) != source_size:
364+
raise ImportError("bytecode is stale for {}".format(fullname))
350365
# Can't return the code object as errors from marshal loading need to
351366
# propagate even when source is available.
352-
return data[8:]
367+
return data[12:]
353368

354369
@module_for_loader
355370
def _load_module(self, module, *, sourceless=False):
@@ -377,11 +392,20 @@ class SourceLoader(_LoaderBasics):
377392
def path_mtime(self, path):
378393
"""Optional method that returns the modification time (an int) for the
379394
specified path, where path is a str.
395+
"""
396+
raise NotImplementedError
380397

381-
Implementing this method allows the loader to read bytecode files.
398+
def path_stats(self, path):
399+
"""Optional method returning a metadata dict for the specified path
400+
to by the path (str).
401+
Possible keys:
402+
- 'mtime' (mandatory) is the numeric timestamp of last source
403+
code modification;
404+
- 'size' (optional) is the size in bytes of the source code.
382405
406+
Implementing this method allows the loader to read bytecode files.
383407
"""
384-
raise NotImplementedError
408+
return {'mtime': self.path_mtime(path)}
385409

386410
def set_data(self, path, data):
387411
"""Optional method which writes data (bytes) to a file path (a str).
@@ -407,7 +431,7 @@ def get_source(self, fullname):
407431
def get_code(self, fullname):
408432
"""Concrete implementation of InspectLoader.get_code.
409433
410-
Reading of bytecode requires path_mtime to be implemented. To write
434+
Reading of bytecode requires path_stats to be implemented. To write
411435
bytecode, set_data must also be implemented.
412436
413437
"""
@@ -416,18 +440,19 @@ def get_code(self, fullname):
416440
source_mtime = None
417441
if bytecode_path is not None:
418442
try:
419-
source_mtime = self.path_mtime(source_path)
443+
st = self.path_stats(source_path)
420444
except NotImplementedError:
421445
pass
422446
else:
447+
source_mtime = int(st['mtime'])
423448
try:
424449
data = self.get_data(bytecode_path)
425450
except IOError:
426451
pass
427452
else:
428453
try:
429454
bytes_data = self._bytes_from_bytecode(fullname, data,
430-
source_mtime)
455+
st)
431456
except (ImportError, EOFError):
432457
pass
433458
else:
@@ -448,6 +473,7 @@ def get_code(self, fullname):
448473
# throw an exception.
449474
data = bytearray(imp.get_magic())
450475
data.extend(marshal._w_long(source_mtime))
476+
data.extend(marshal._w_long(len(source_bytes)))
451477
data.extend(marshal.dumps(code_object))
452478
try:
453479
self.set_data(bytecode_path, data)
@@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):
492518

493519
"""Concrete implementation of SourceLoader using the file system."""
494520

495-
def path_mtime(self, path):
496-
"""Return the modification time for the path."""
497-
return int(_os.stat(path).st_mtime)
521+
def path_stats(self, path):
522+
"""Return the metadat for the path."""
523+
st = _os.stat(path)
524+
return {'mtime': st.st_mtime, 'size': st.st_size}
498525

499526
def set_data(self, path, data):
500527
"""Write bytes data to a file."""

Lib/importlib/abc.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
123123

124124
def path_mtime(self, path):
125125
"""Return the (int) modification time for the path (str)."""
126-
raise NotImplementedError
126+
if self.path_stats.__func__ is SourceLoader.path_stats:
127+
raise NotImplementedError
128+
return int(self.path_stats(path)['mtime'])
129+
130+
def path_stats(self, path):
131+
"""Return a metadata dict for the source pointed to by the path (str).
132+
Possible keys:
133+
- 'mtime' (mandatory) is the numeric timestamp of last source
134+
code modification;
135+
- 'size' (optional) is the size in bytes of the source code.
136+
"""
137+
if self.path_mtime.__func__ is SourceLoader.path_mtime:
138+
raise NotImplementedError
139+
return {'mtime': self.path_mtime(path)}
127140

128141
def set_data(self, path, data):
129142
"""Write the bytes to the path (if possible).

Lib/importlib/test/source/test_abc_loader.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .. import util
66
from . import util as source_util
77

8+
import collections
89
import imp
910
import inspect
1011
import io
@@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
4041
def __init__(self, path, magic=imp.get_magic()):
4142
super().__init__(path)
4243
self.bytecode_path = imp.cache_from_source(self.path)
44+
self.source_size = len(self.source)
4345
data = bytearray(magic)
4446
data.extend(marshal._w_long(self.source_mtime))
47+
data.extend(marshal._w_long(self.source_size))
4548
code_object = compile(self.source, self.path, 'exec',
4649
dont_inherit=True)
4750
data.extend(marshal.dumps(code_object))
@@ -56,9 +59,9 @@ def get_data(self, path):
5659
else:
5760
raise IOError
5861

59-
def path_mtime(self, path):
62+
def path_stats(self, path):
6063
assert path == self.path
61-
return self.source_mtime
64+
return {'mtime': self.source_mtime, 'size': self.source_size}
6265

6366
def set_data(self, path, data):
6467
self.written[path] = bytes(data)
@@ -657,6 +660,7 @@ def verify_code(self, code_object, *, bytecode_written=False):
657660
self.assertIn(self.cached, self.loader.written)
658661
data = bytearray(imp.get_magic())
659662
data.extend(marshal._w_long(self.loader.source_mtime))
663+
data.extend(marshal._w_long(self.loader.source_size))
660664
data.extend(marshal.dumps(code_object))
661665
self.assertEqual(self.loader.written[self.cached], bytes(data))
662666

@@ -847,7 +851,7 @@ def test_SourceLoader(self):
847851
# Required abstractmethods.
848852
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
849853
# Optional abstractmethods.
850-
self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
854+
self.raises_NotImplementedError(ins,'path_stats', 'set_data')
851855

852856
def test_PyLoader(self):
853857
self.raises_NotImplementedError(self.PyLoader(), 'source_path',

Lib/importlib/test/source/test_file_loader.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,6 @@ def test_module_reuse(self):
7070
module_dict_id = id(module.__dict__)
7171
with open(mapping['_temp'], 'w') as file:
7272
file.write("testing_var = 42\n")
73-
# For filesystems where the mtime is only to a second granularity,
74-
# everything that has happened above can be too fast;
75-
# force an mtime on the source that is guaranteed to be different
76-
# than the original mtime.
77-
loader.path_mtime = self.fake_mtime(loader.path_mtime)
7873
module = loader.load_module('_temp')
7974
self.assertTrue('testing_var' in module.__dict__,
8075
"'testing_var' not in "
@@ -190,10 +185,17 @@ def _test_partial_timestamp(self, test, *, del_source=False):
190185
del_source=del_source)
191186
test('_temp', mapping, bc_path)
192187

188+
def _test_partial_size(self, test, *, del_source=False):
189+
with source_util.create_modules('_temp') as mapping:
190+
bc_path = self.manipulate_bytecode('_temp', mapping,
191+
lambda bc: bc[:11],
192+
del_source=del_source)
193+
test('_temp', mapping, bc_path)
194+
193195
def _test_no_marshal(self, *, del_source=False):
194196
with source_util.create_modules('_temp') as mapping:
195197
bc_path = self.manipulate_bytecode('_temp', mapping,
196-
lambda bc: bc[:8],
198+
lambda bc: bc[:12],
197199
del_source=del_source)
198200
file_path = mapping['_temp'] if not del_source else bc_path
199201
with self.assertRaises(EOFError):
@@ -202,7 +204,7 @@ def _test_no_marshal(self, *, del_source=False):
202204
def _test_non_code_marshal(self, *, del_source=False):
203205
with source_util.create_modules('_temp') as mapping:
204206
bytecode_path = self.manipulate_bytecode('_temp', mapping,
205-
lambda bc: bc[:8] + marshal.dumps(b'abcd'),
207+
lambda bc: bc[:12] + marshal.dumps(b'abcd'),
206208
del_source=del_source)
207209
file_path = mapping['_temp'] if not del_source else bytecode_path
208210
with self.assertRaises(ImportError):
@@ -211,7 +213,7 @@ def _test_non_code_marshal(self, *, del_source=False):
211213
def _test_bad_marshal(self, *, del_source=False):
212214
with source_util.create_modules('_temp') as mapping:
213215
bytecode_path = self.manipulate_bytecode('_temp', mapping,
214-
lambda bc: bc[:8] + b'<test>',
216+
lambda bc: bc[:12] + b'<test>',
215217
del_source=del_source)
216218
file_path = mapping['_temp'] if not del_source else bytecode_path
217219
with self.assertRaises(EOFError):
@@ -235,15 +237,15 @@ def test_empty_file(self):
235237
def test(name, mapping, bytecode_path):
236238
self.import_(mapping[name], name)
237239
with open(bytecode_path, 'rb') as file:
238-
self.assertGreater(len(file.read()), 8)
240+
self.assertGreater(len(file.read()), 12)
239241

240242
self._test_empty_file(test)
241243

242244
def test_partial_magic(self):
243245
def test(name, mapping, bytecode_path):
244246
self.import_(mapping[name], name)
245247
with open(bytecode_path, 'rb') as file:
246-
self.assertGreater(len(file.read()), 8)
248+
self.assertGreater(len(file.read()), 12)
247249

248250
self._test_partial_magic(test)
249251

@@ -254,7 +256,7 @@ def test_magic_only(self):
254256
def test(name, mapping, bytecode_path):
255257
self.import_(mapping[name], name)
256258
with open(bytecode_path, 'rb') as file:
257-
self.assertGreater(len(file.read()), 8)
259+
self.assertGreater(len(file.read()), 12)
258260

259261
self._test_magic_only(test)
260262

@@ -276,10 +278,21 @@ def test_partial_timestamp(self):
276278
def test(name, mapping, bc_path):
277279
self.import_(mapping[name], name)
278280
with open(bc_path, 'rb') as file:
279-
self.assertGreater(len(file.read()), 8)
281+
self.assertGreater(len(file.read()), 12)
280282

281283
self._test_partial_timestamp(test)
282284

285+
@source_util.writes_bytecode_files
286+
def test_partial_size(self):
287+
# When the size is partial, regenerate the .pyc, else
288+
# raise EOFError.
289+
def test(name, mapping, bc_path):
290+
self.import_(mapping[name], name)
291+
with open(bc_path, 'rb') as file:
292+
self.assertGreater(len(file.read()), 12)
293+
294+
self._test_partial_size(test)
295+
283296
@source_util.writes_bytecode_files
284297
def test_no_marshal(self):
285298
# When there is only the magic number and timestamp, raise EOFError.
@@ -375,6 +388,13 @@ def test(name, mapping, bytecode_path):
375388

376389
self._test_partial_timestamp(test, del_source=True)
377390

391+
def test_partial_size(self):
392+
def test(name, mapping, bytecode_path):
393+
with self.assertRaises(EOFError):
394+
self.import_(bytecode_path, name)
395+
396+
self._test_partial_size(test, del_source=True)
397+
378398
def test_no_marshal(self):
379399
self._test_no_marshal(del_source=True)
380400

Lib/pkgutil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def read_code(stream):
2121
if magic != imp.get_magic():
2222
return None
2323

24-
stream.read(4) # Skip timestamp
24+
stream.read(8) # Skip timestamp and size
2525
return marshal.load(stream)
2626

2727

Lib/py_compile.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
110110
"""
111111
with tokenize.open(file) as f:
112112
try:
113-
timestamp = int(os.fstat(f.fileno()).st_mtime)
113+
st = os.fstat(f.fileno())
114114
except AttributeError:
115-
timestamp = int(os.stat(file).st_mtime)
115+
st = os.stat(file)
116+
timestamp = int(st.st_mtime)
117+
size = st.st_size & 0xFFFFFFFF
116118
codestring = f.read()
117119
try:
118120
codeobject = builtins.compile(codestring, dfile or file, 'exec',
@@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
139141
with open(cfile, 'wb') as fc:
140142
fc.write(b'\0\0\0\0')
141143
wr_long(fc, timestamp)
144+
wr_long(fc, size)
142145
marshal.dump(codeobject, fc)
143146
fc.flush()
144147
fc.seek(0, 0)

Lib/test/test_import.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def test_module_without_source(self):
380380
def test_foreign_code(self):
381381
py_compile.compile(self.file_name)
382382
with open(self.compiled_name, "rb") as f:
383-
header = f.read(8)
383+
header = f.read(12)
384384
code = marshal.load(f)
385385
constants = list(code.co_consts)
386386
foreign_code = test_main.__code__
@@ -644,6 +644,16 @@ def cleanup():
644644
self.assertEqual(sys.modules['pep3147.foo'].__cached__,
645645
os.path.join(os.curdir, foo_pyc))
646646

647+
def test_recompute_pyc_same_second(self):
648+
# Even when the source file doesn't change timestamp, a change in
649+
# source size is enough to trigger recomputation of the pyc file.
650+
__import__(TESTFN)
651+
unload(TESTFN)
652+
with open(self.source, 'a') as fp:
653+
print("x = 5", file=fp)
654+
m = __import__(TESTFN)
655+
self.assertEqual(m.x, 5)
656+
647657

648658
class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports):
649659

0 commit comments

Comments
 (0)