Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit acbd3f9

Browse filesBrowse files
encukouvstinnerfrenzymadness
authored
gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846)
Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: Lumír 'Frenzy' Balhar <frenzy.madness@gmail.com>
1 parent 622ddc4 commit acbd3f9
Copy full SHA for acbd3f9

File tree

Expand file treeCollapse file tree

4 files changed

+156
-9
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+156
-9
lines changed

‎Doc/library/tarfile.rst

Copy file name to clipboardExpand all lines: Doc/library/tarfile.rst
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,11 @@ A ``TarInfo`` object has the following public data attributes:
740740
Name of the target file name, which is only present in :class:`TarInfo` objects
741741
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
742742

743+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
744+
that contains the link.
745+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
746+
the archive.
747+
743748

744749
.. attribute:: TarInfo.uid
745750
:type: int

‎Lib/tarfile.py

Copy file name to clipboardExpand all lines: Lib/tarfile.py
+9-2Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,7 @@ def __init__(self, tarinfo):
742742
class AbsoluteLinkError(FilterError):
743743
def __init__(self, tarinfo):
744744
self.tarinfo = tarinfo
745-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
746746

747747
class LinkOutsideDestinationError(FilterError):
748748
def __init__(self, tarinfo, path):
@@ -802,7 +802,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
802802
if member.islnk() or member.issym():
803803
if os.path.isabs(member.linkname):
804804
raise AbsoluteLinkError(member)
805-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805+
if member.issym():
806+
target_path = os.path.join(dest_path,
807+
os.path.dirname(name),
808+
member.linkname)
809+
else:
810+
target_path = os.path.join(dest_path,
811+
member.linkname)
812+
target_path = os.path.realpath(target_path)
806813
if os.path.commonpath([target_path, dest_path]) != dest_path:
807814
raise LinkOutsideDestinationError(member, target_path)
808815
return new_attrs

‎Lib/test/test_tarfile.py

Copy file name to clipboardExpand all lines: Lib/test/test_tarfile.py
+139-7Lines changed: 139 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,10 +3337,12 @@ def __exit__(self, *exc):
33373337
self.bio = None
33383338

33393339
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3340-
mode=None, **kwargs):
3340+
mode=None, size=None, **kwargs):
33413341
"""Add a member to the test archive. Call within `with`."""
33423342
name = str(name)
33433343
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3344+
if size is not None:
3345+
tarinfo.size = size
33443346
if mode:
33453347
tarinfo.mode = _filemode_to_int(mode)
33463348
if symlink_to is not None:
@@ -3416,7 +3418,8 @@ def check_context(self, tar, filter):
34163418
raise self.raised_exception
34173419
self.assertEqual(self.expected_paths, set())
34183420

3419-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3421+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3422+
size=None):
34203423
"""Check a single file. See check_context."""
34213424
if self.raised_exception:
34223425
raise self.raised_exception
@@ -3445,6 +3448,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
34453448
self.assertTrue(path.is_fifo())
34463449
else:
34473450
raise NotImplementedError(type)
3451+
if size is not None:
3452+
self.assertEqual(path.stat().st_size, size)
34483453
for parent in path.parents:
34493454
self.expected_paths.discard(parent)
34503455

@@ -3491,8 +3496,15 @@ def test_parent_symlink(self):
34913496
# Test interplaying symlinks
34923497
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
34933498
with ArchiveMaker() as arc:
3499+
3500+
# `current` links to `.` which is both:
3501+
# - the destination directory
3502+
# - `current` itself
34943503
arc.add('current', symlink_to='.')
3504+
3505+
# effectively points to ./../
34953506
arc.add('parent', symlink_to='current/..')
3507+
34963508
arc.add('parent/evil')
34973509

34983510
if os_helper.can_symlink():
@@ -3534,9 +3546,46 @@ def test_parent_symlink(self):
35343546
def test_parent_symlink2(self):
35353547
# Test interplaying symlinks
35363548
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3549+
3550+
# Posix and Windows have different pathname resolution:
3551+
# either symlink or a '..' component resolve first.
3552+
# Let's see which we are on.
3553+
if os_helper.can_symlink():
3554+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3555+
os.mkdir(testpath)
3556+
3557+
# testpath/current links to `.` which is all of:
3558+
# - `testpath`
3559+
# - `testpath/current`
3560+
# - `testpath/current/current`
3561+
# - etc.
3562+
os.symlink('.', os.path.join(testpath, 'current'))
3563+
3564+
# we'll test where `testpath/current/../file` ends up
3565+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3566+
pass
3567+
3568+
if os.path.exists(os.path.join(testpath, 'file')):
3569+
# Windows collapses 'current\..' to '.' first, leaving
3570+
# 'testpath\file'
3571+
dotdot_resolves_early = True
3572+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3573+
# Posix resolves 'current' to '.' first, leaving
3574+
# 'testpath/../file'
3575+
dotdot_resolves_early = False
3576+
else:
3577+
raise AssertionError('Could not determine link resolution')
3578+
35373579
with ArchiveMaker() as arc:
3580+
3581+
# `current` links to `.` which is both the destination directory
3582+
# and `current` itself
35383583
arc.add('current', symlink_to='.')
3584+
3585+
# `current/parent` is also available as `./parent`,
3586+
# and effectively points to `./../`
35393587
arc.add('current/parent', symlink_to='..')
3588+
35403589
arc.add('parent/evil')
35413590

35423591
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3550,6 +3599,7 @@ def test_parent_symlink2(self):
35503599

35513600
with self.check_context(arc.open(), 'tar'):
35523601
if os_helper.can_symlink():
3602+
# Fail when extracting a file outside destination
35533603
self.expect_exception(
35543604
tarfile.OutsideDestinationError,
35553605
"'parent/evil' would be extracted to "
@@ -3560,10 +3610,24 @@ def test_parent_symlink2(self):
35603610
self.expect_file('parent/evil')
35613611

35623612
with self.check_context(arc.open(), 'data'):
3563-
self.expect_exception(
3564-
tarfile.LinkOutsideDestinationError,
3565-
"""'current/parent' would link to ['"].*['"], """
3566-
+ "which is outside the destination")
3613+
if os_helper.can_symlink():
3614+
if dotdot_resolves_early:
3615+
# Fail when extracting a file outside destination
3616+
self.expect_exception(
3617+
tarfile.OutsideDestinationError,
3618+
"'parent/evil' would be extracted to "
3619+
+ """['"].*evil['"], which is outside """
3620+
+ "the destination")
3621+
else:
3622+
# Fail as soon as we have a symlink outside the destination
3623+
self.expect_exception(
3624+
tarfile.LinkOutsideDestinationError,
3625+
"'current/parent' would link to "
3626+
+ """['"].*outerdir['"], which is outside """
3627+
+ "the destination")
3628+
else:
3629+
self.expect_file('current/')
3630+
self.expect_file('parent/evil')
35673631

35683632
@symlink_test
35693633
def test_absolute_symlink(self):
@@ -3593,12 +3657,30 @@ def test_absolute_symlink(self):
35933657
with self.check_context(arc.open(), 'data'):
35943658
self.expect_exception(
35953659
tarfile.AbsoluteLinkError,
3596-
"'parent' is a symlink to an absolute path")
3660+
"'parent' is a link to an absolute path")
3661+
3662+
def test_absolute_hardlink(self):
3663+
# Test hardlink to an absolute path
3664+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3665+
with ArchiveMaker() as arc:
3666+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3667+
3668+
with self.check_context(arc.open(), 'fully_trusted'):
3669+
self.expect_exception(KeyError, ".*foo. not found")
3670+
3671+
with self.check_context(arc.open(), 'tar'):
3672+
self.expect_exception(KeyError, ".*foo. not found")
3673+
3674+
with self.check_context(arc.open(), 'data'):
3675+
self.expect_exception(
3676+
tarfile.AbsoluteLinkError,
3677+
"'parent' is a link to an absolute path")
35973678

35983679
@symlink_test
35993680
def test_sly_relative0(self):
36003681
# Inspired by 'relative0' in jwilk/traversal-archives
36013682
with ArchiveMaker() as arc:
3683+
# points to `../../tmp/moo`
36023684
arc.add('../moo', symlink_to='..//tmp/moo')
36033685

36043686
try:
@@ -3649,6 +3731,56 @@ def test_sly_relative2(self):
36493731
+ """['"].*moo['"], which is outside the """
36503732
+ "destination")
36513733

3734+
@symlink_test
3735+
def test_deep_symlink(self):
3736+
# Test that symlinks and hardlinks inside a directory
3737+
# point to the correct file (`target` of size 3).
3738+
# If links aren't supported we get a copy of the file.
3739+
with ArchiveMaker() as arc:
3740+
arc.add('targetdir/target', size=3)
3741+
# a hardlink's linkname is relative to the archive
3742+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3743+
'targetdir', 'target'))
3744+
# a symlink's linkname is relative to the link's directory
3745+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3746+
'..', 'targetdir', 'target'))
3747+
3748+
for filter in 'tar', 'data', 'fully_trusted':
3749+
with self.check_context(arc.open(), filter):
3750+
self.expect_file('targetdir/target', size=3)
3751+
self.expect_file('linkdir/hardlink', size=3)
3752+
if os_helper.can_symlink():
3753+
self.expect_file('linkdir/symlink', size=3,
3754+
symlink_to='../targetdir/target')
3755+
else:
3756+
self.expect_file('linkdir/symlink', size=3)
3757+
3758+
@symlink_test
3759+
def test_chains(self):
3760+
# Test chaining of symlinks/hardlinks.
3761+
# Symlinks are created before the files they point to.
3762+
with ArchiveMaker() as arc:
3763+
arc.add('linkdir/symlink', symlink_to='hardlink')
3764+
arc.add('symlink2', symlink_to=os.path.join(
3765+
'linkdir', 'hardlink2'))
3766+
arc.add('targetdir/target', size=3)
3767+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3768+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3769+
3770+
for filter in 'tar', 'data', 'fully_trusted':
3771+
with self.check_context(arc.open(), filter):
3772+
self.expect_file('targetdir/target', size=3)
3773+
self.expect_file('linkdir/hardlink', size=3)
3774+
self.expect_file('linkdir/hardlink2', size=3)
3775+
if os_helper.can_symlink():
3776+
self.expect_file('linkdir/symlink', size=3,
3777+
symlink_to='hardlink')
3778+
self.expect_file('symlink2', size=3,
3779+
symlink_to='linkdir/hardlink2')
3780+
else:
3781+
self.expect_file('linkdir/symlink', size=3)
3782+
self.expect_file('symlink2', size=3)
3783+
36523784
def test_modes(self):
36533785
# Test how file modes are extracted
36543786
# (Note that the modes are ignored on platforms without working chmod)
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.