Skip to content

Commit acbd3f9

Browse files
encukouvstinnerfrenzymadness
authored
gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846)
Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: Lumír 'Frenzy' Balhar <frenzy.madness@gmail.com>
1 parent 622ddc4 commit acbd3f9

File tree

4 files changed

+156
-9
lines changed

4 files changed

+156
-9
lines changed

Doc/library/tarfile.rst

+5
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,11 @@ A ``TarInfo`` object has the following public data attributes:
740740
Name of the target file name, which is only present in :class:`TarInfo` objects
741741
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
742742

743+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
744+
that contains the link.
745+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
746+
the archive.
747+
743748

744749
.. attribute:: TarInfo.uid
745750
:type: int

Lib/tarfile.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,7 @@ def __init__(self, tarinfo):
742742
class AbsoluteLinkError(FilterError):
743743
def __init__(self, tarinfo):
744744
self.tarinfo = tarinfo
745-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
746746

747747
class LinkOutsideDestinationError(FilterError):
748748
def __init__(self, tarinfo, path):
@@ -802,7 +802,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
802802
if member.islnk() or member.issym():
803803
if os.path.isabs(member.linkname):
804804
raise AbsoluteLinkError(member)
805-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805+
if member.issym():
806+
target_path = os.path.join(dest_path,
807+
os.path.dirname(name),
808+
member.linkname)
809+
else:
810+
target_path = os.path.join(dest_path,
811+
member.linkname)
812+
target_path = os.path.realpath(target_path)
806813
if os.path.commonpath([target_path, dest_path]) != dest_path:
807814
raise LinkOutsideDestinationError(member, target_path)
808815
return new_attrs

Lib/test/test_tarfile.py

+139-7
Original file line numberDiff line numberDiff line change
@@ -3337,10 +3337,12 @@ def __exit__(self, *exc):
33373337
self.bio = None
33383338

33393339
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3340-
mode=None, **kwargs):
3340+
mode=None, size=None, **kwargs):
33413341
"""Add a member to the test archive. Call within `with`."""
33423342
name = str(name)
33433343
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3344+
if size is not None:
3345+
tarinfo.size = size
33443346
if mode:
33453347
tarinfo.mode = _filemode_to_int(mode)
33463348
if symlink_to is not None:
@@ -3416,7 +3418,8 @@ def check_context(self, tar, filter):
34163418
raise self.raised_exception
34173419
self.assertEqual(self.expected_paths, set())
34183420

3419-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3421+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3422+
size=None):
34203423
"""Check a single file. See check_context."""
34213424
if self.raised_exception:
34223425
raise self.raised_exception
@@ -3445,6 +3448,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
34453448
self.assertTrue(path.is_fifo())
34463449
else:
34473450
raise NotImplementedError(type)
3451+
if size is not None:
3452+
self.assertEqual(path.stat().st_size, size)
34483453
for parent in path.parents:
34493454
self.expected_paths.discard(parent)
34503455

@@ -3491,8 +3496,15 @@ def test_parent_symlink(self):
34913496
# Test interplaying symlinks
34923497
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
34933498
with ArchiveMaker() as arc:
3499+
3500+
# `current` links to `.` which is both:
3501+
# - the destination directory
3502+
# - `current` itself
34943503
arc.add('current', symlink_to='.')
3504+
3505+
# effectively points to ./../
34953506
arc.add('parent', symlink_to='current/..')
3507+
34963508
arc.add('parent/evil')
34973509

34983510
if os_helper.can_symlink():
@@ -3534,9 +3546,46 @@ def test_parent_symlink(self):
35343546
def test_parent_symlink2(self):
35353547
# Test interplaying symlinks
35363548
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3549+
3550+
# Posix and Windows have different pathname resolution:
3551+
# either symlink or a '..' component resolve first.
3552+
# Let's see which we are on.
3553+
if os_helper.can_symlink():
3554+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3555+
os.mkdir(testpath)
3556+
3557+
# testpath/current links to `.` which is all of:
3558+
# - `testpath`
3559+
# - `testpath/current`
3560+
# - `testpath/current/current`
3561+
# - etc.
3562+
os.symlink('.', os.path.join(testpath, 'current'))
3563+
3564+
# we'll test where `testpath/current/../file` ends up
3565+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3566+
pass
3567+
3568+
if os.path.exists(os.path.join(testpath, 'file')):
3569+
# Windows collapses 'current\..' to '.' first, leaving
3570+
# 'testpath\file'
3571+
dotdot_resolves_early = True
3572+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3573+
# Posix resolves 'current' to '.' first, leaving
3574+
# 'testpath/../file'
3575+
dotdot_resolves_early = False
3576+
else:
3577+
raise AssertionError('Could not determine link resolution')
3578+
35373579
with ArchiveMaker() as arc:
3580+
3581+
# `current` links to `.` which is both the destination directory
3582+
# and `current` itself
35383583
arc.add('current', symlink_to='.')
3584+
3585+
# `current/parent` is also available as `./parent`,
3586+
# and effectively points to `./../`
35393587
arc.add('current/parent', symlink_to='..')
3588+
35403589
arc.add('parent/evil')
35413590

35423591
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3550,6 +3599,7 @@ def test_parent_symlink2(self):
35503599

35513600
with self.check_context(arc.open(), 'tar'):
35523601
if os_helper.can_symlink():
3602+
# Fail when extracting a file outside destination
35533603
self.expect_exception(
35543604
tarfile.OutsideDestinationError,
35553605
"'parent/evil' would be extracted to "
@@ -3560,10 +3610,24 @@ def test_parent_symlink2(self):
35603610
self.expect_file('parent/evil')
35613611

35623612
with self.check_context(arc.open(), 'data'):
3563-
self.expect_exception(
3564-
tarfile.LinkOutsideDestinationError,
3565-
"""'current/parent' would link to ['"].*['"], """
3566-
+ "which is outside the destination")
3613+
if os_helper.can_symlink():
3614+
if dotdot_resolves_early:
3615+
# Fail when extracting a file outside destination
3616+
self.expect_exception(
3617+
tarfile.OutsideDestinationError,
3618+
"'parent/evil' would be extracted to "
3619+
+ """['"].*evil['"], which is outside """
3620+
+ "the destination")
3621+
else:
3622+
# Fail as soon as we have a symlink outside the destination
3623+
self.expect_exception(
3624+
tarfile.LinkOutsideDestinationError,
3625+
"'current/parent' would link to "
3626+
+ """['"].*outerdir['"], which is outside """
3627+
+ "the destination")
3628+
else:
3629+
self.expect_file('current/')
3630+
self.expect_file('parent/evil')
35673631

35683632
@symlink_test
35693633
def test_absolute_symlink(self):
@@ -3593,12 +3657,30 @@ def test_absolute_symlink(self):
35933657
with self.check_context(arc.open(), 'data'):
35943658
self.expect_exception(
35953659
tarfile.AbsoluteLinkError,
3596-
"'parent' is a symlink to an absolute path")
3660+
"'parent' is a link to an absolute path")
3661+
3662+
def test_absolute_hardlink(self):
3663+
# Test hardlink to an absolute path
3664+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3665+
with ArchiveMaker() as arc:
3666+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3667+
3668+
with self.check_context(arc.open(), 'fully_trusted'):
3669+
self.expect_exception(KeyError, ".*foo. not found")
3670+
3671+
with self.check_context(arc.open(), 'tar'):
3672+
self.expect_exception(KeyError, ".*foo. not found")
3673+
3674+
with self.check_context(arc.open(), 'data'):
3675+
self.expect_exception(
3676+
tarfile.AbsoluteLinkError,
3677+
"'parent' is a link to an absolute path")
35973678

35983679
@symlink_test
35993680
def test_sly_relative0(self):
36003681
# Inspired by 'relative0' in jwilk/traversal-archives
36013682
with ArchiveMaker() as arc:
3683+
# points to `../../tmp/moo`
36023684
arc.add('../moo', symlink_to='..//tmp/moo')
36033685

36043686
try:
@@ -3649,6 +3731,56 @@ def test_sly_relative2(self):
36493731
+ """['"].*moo['"], which is outside the """
36503732
+ "destination")
36513733

3734+
@symlink_test
3735+
def test_deep_symlink(self):
3736+
# Test that symlinks and hardlinks inside a directory
3737+
# point to the correct file (`target` of size 3).
3738+
# If links aren't supported we get a copy of the file.
3739+
with ArchiveMaker() as arc:
3740+
arc.add('targetdir/target', size=3)
3741+
# a hardlink's linkname is relative to the archive
3742+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3743+
'targetdir', 'target'))
3744+
# a symlink's linkname is relative to the link's directory
3745+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3746+
'..', 'targetdir', 'target'))
3747+
3748+
for filter in 'tar', 'data', 'fully_trusted':
3749+
with self.check_context(arc.open(), filter):
3750+
self.expect_file('targetdir/target', size=3)
3751+
self.expect_file('linkdir/hardlink', size=3)
3752+
if os_helper.can_symlink():
3753+
self.expect_file('linkdir/symlink', size=3,
3754+
symlink_to='../targetdir/target')
3755+
else:
3756+
self.expect_file('linkdir/symlink', size=3)
3757+
3758+
@symlink_test
3759+
def test_chains(self):
3760+
# Test chaining of symlinks/hardlinks.
3761+
# Symlinks are created before the files they point to.
3762+
with ArchiveMaker() as arc:
3763+
arc.add('linkdir/symlink', symlink_to='hardlink')
3764+
arc.add('symlink2', symlink_to=os.path.join(
3765+
'linkdir', 'hardlink2'))
3766+
arc.add('targetdir/target', size=3)
3767+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3768+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3769+
3770+
for filter in 'tar', 'data', 'fully_trusted':
3771+
with self.check_context(arc.open(), filter):
3772+
self.expect_file('targetdir/target', size=3)
3773+
self.expect_file('linkdir/hardlink', size=3)
3774+
self.expect_file('linkdir/hardlink2', size=3)
3775+
if os_helper.can_symlink():
3776+
self.expect_file('linkdir/symlink', size=3,
3777+
symlink_to='hardlink')
3778+
self.expect_file('symlink2', size=3,
3779+
symlink_to='linkdir/hardlink2')
3780+
else:
3781+
self.expect_file('linkdir/symlink', size=3)
3782+
self.expect_file('symlink2', size=3)
3783+
36523784
def test_modes(self):
36533785
# Test how file modes are extracted
36543786
# (Note that the modes are ignored on platforms without working chmod)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
 (0)