Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e79384c

Browse filesBrowse files
encukouvstinnerfrenzymadness
authored andcommitted
gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846)
(cherry picked from commit acbd3f9) Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: Lumír 'Frenzy' Balhar <frenzy.madness@gmail.com>
1 parent be99653 commit e79384c
Copy full SHA for e79384c

File tree

Expand file treeCollapse file tree

4 files changed

+156
-9
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+156
-9
lines changed

‎Doc/library/tarfile.rst

Copy file name to clipboardExpand all lines: Doc/library/tarfile.rst
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes:
732732
Name of the target file name, which is only present in :class:`TarInfo` objects
733733
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
734734

735+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
736+
that contains the link.
737+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
738+
the archive.
739+
735740

736741
.. attribute:: TarInfo.uid
737742
:type: int

‎Lib/tarfile.py

Copy file name to clipboardExpand all lines: Lib/tarfile.py
+9-2Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ def __init__(self, tarinfo):
741741
class AbsoluteLinkError(FilterError):
742742
def __init__(self, tarinfo):
743743
self.tarinfo = tarinfo
744-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
744+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
745745

746746
class LinkOutsideDestinationError(FilterError):
747747
def __init__(self, tarinfo, path):
@@ -801,7 +801,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
801801
if member.islnk() or member.issym():
802802
if os.path.isabs(member.linkname):
803803
raise AbsoluteLinkError(member)
804-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
804+
if member.issym():
805+
target_path = os.path.join(dest_path,
806+
os.path.dirname(name),
807+
member.linkname)
808+
else:
809+
target_path = os.path.join(dest_path,
810+
member.linkname)
811+
target_path = os.path.realpath(target_path)
805812
if os.path.commonpath([target_path, dest_path]) != dest_path:
806813
raise LinkOutsideDestinationError(member, target_path)
807814
return new_attrs

‎Lib/test/test_tarfile.py

Copy file name to clipboardExpand all lines: Lib/test/test_tarfile.py
+139-7Lines changed: 139 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3239,10 +3239,12 @@ def __exit__(self, *exc):
32393239
self.bio = None
32403240

32413241
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3242-
mode=None, **kwargs):
3242+
mode=None, size=None, **kwargs):
32433243
"""Add a member to the test archive. Call within `with`."""
32443244
name = str(name)
32453245
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3246+
if size is not None:
3247+
tarinfo.size = size
32463248
if mode:
32473249
tarinfo.mode = _filemode_to_int(mode)
32483250
if symlink_to is not None:
@@ -3318,7 +3320,8 @@ def check_context(self, tar, filter):
33183320
raise self.raised_exception
33193321
self.assertEqual(self.expected_paths, set())
33203322

3321-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3323+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3324+
size=None):
33223325
"""Check a single file. See check_context."""
33233326
if self.raised_exception:
33243327
raise self.raised_exception
@@ -3347,6 +3350,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
33473350
self.assertTrue(path.is_fifo())
33483351
else:
33493352
raise NotImplementedError(type)
3353+
if size is not None:
3354+
self.assertEqual(path.stat().st_size, size)
33503355
for parent in path.parents:
33513356
self.expected_paths.discard(parent)
33523357

@@ -3393,8 +3398,15 @@ def test_parent_symlink(self):
33933398
# Test interplaying symlinks
33943399
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
33953400
with ArchiveMaker() as arc:
3401+
3402+
# `current` links to `.` which is both:
3403+
# - the destination directory
3404+
# - `current` itself
33963405
arc.add('current', symlink_to='.')
3406+
3407+
# effectively points to ./../
33973408
arc.add('parent', symlink_to='current/..')
3409+
33983410
arc.add('parent/evil')
33993411

34003412
if os_helper.can_symlink():
@@ -3436,9 +3448,46 @@ def test_parent_symlink(self):
34363448
def test_parent_symlink2(self):
34373449
# Test interplaying symlinks
34383450
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3451+
3452+
# Posix and Windows have different pathname resolution:
3453+
# either symlink or a '..' component resolve first.
3454+
# Let's see which we are on.
3455+
if os_helper.can_symlink():
3456+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3457+
os.mkdir(testpath)
3458+
3459+
# testpath/current links to `.` which is all of:
3460+
# - `testpath`
3461+
# - `testpath/current`
3462+
# - `testpath/current/current`
3463+
# - etc.
3464+
os.symlink('.', os.path.join(testpath, 'current'))
3465+
3466+
# we'll test where `testpath/current/../file` ends up
3467+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3468+
pass
3469+
3470+
if os.path.exists(os.path.join(testpath, 'file')):
3471+
# Windows collapses 'current\..' to '.' first, leaving
3472+
# 'testpath\file'
3473+
dotdot_resolves_early = True
3474+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3475+
# Posix resolves 'current' to '.' first, leaving
3476+
# 'testpath/../file'
3477+
dotdot_resolves_early = False
3478+
else:
3479+
raise AssertionError('Could not determine link resolution')
3480+
34393481
with ArchiveMaker() as arc:
3482+
3483+
# `current` links to `.` which is both the destination directory
3484+
# and `current` itself
34403485
arc.add('current', symlink_to='.')
3486+
3487+
# `current/parent` is also available as `./parent`,
3488+
# and effectively points to `./../`
34413489
arc.add('current/parent', symlink_to='..')
3490+
34423491
arc.add('parent/evil')
34433492

34443493
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3452,6 +3501,7 @@ def test_parent_symlink2(self):
34523501

34533502
with self.check_context(arc.open(), 'tar'):
34543503
if os_helper.can_symlink():
3504+
# Fail when extracting a file outside destination
34553505
self.expect_exception(
34563506
tarfile.OutsideDestinationError,
34573507
"'parent/evil' would be extracted to "
@@ -3462,10 +3512,24 @@ def test_parent_symlink2(self):
34623512
self.expect_file('parent/evil')
34633513

34643514
with self.check_context(arc.open(), 'data'):
3465-
self.expect_exception(
3466-
tarfile.LinkOutsideDestinationError,
3467-
"""'current/parent' would link to ['"].*['"], """
3468-
+ "which is outside the destination")
3515+
if os_helper.can_symlink():
3516+
if dotdot_resolves_early:
3517+
# Fail when extracting a file outside destination
3518+
self.expect_exception(
3519+
tarfile.OutsideDestinationError,
3520+
"'parent/evil' would be extracted to "
3521+
+ """['"].*evil['"], which is outside """
3522+
+ "the destination")
3523+
else:
3524+
# Fail as soon as we have a symlink outside the destination
3525+
self.expect_exception(
3526+
tarfile.LinkOutsideDestinationError,
3527+
"'current/parent' would link to "
3528+
+ """['"].*outerdir['"], which is outside """
3529+
+ "the destination")
3530+
else:
3531+
self.expect_file('current/')
3532+
self.expect_file('parent/evil')
34693533

34703534
@symlink_test
34713535
def test_absolute_symlink(self):
@@ -3495,12 +3559,30 @@ def test_absolute_symlink(self):
34953559
with self.check_context(arc.open(), 'data'):
34963560
self.expect_exception(
34973561
tarfile.AbsoluteLinkError,
3498-
"'parent' is a symlink to an absolute path")
3562+
"'parent' is a link to an absolute path")
3563+
3564+
def test_absolute_hardlink(self):
3565+
# Test hardlink to an absolute path
3566+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3567+
with ArchiveMaker() as arc:
3568+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3569+
3570+
with self.check_context(arc.open(), 'fully_trusted'):
3571+
self.expect_exception(KeyError, ".*foo. not found")
3572+
3573+
with self.check_context(arc.open(), 'tar'):
3574+
self.expect_exception(KeyError, ".*foo. not found")
3575+
3576+
with self.check_context(arc.open(), 'data'):
3577+
self.expect_exception(
3578+
tarfile.AbsoluteLinkError,
3579+
"'parent' is a link to an absolute path")
34993580

35003581
@symlink_test
35013582
def test_sly_relative0(self):
35023583
# Inspired by 'relative0' in jwilk/traversal-archives
35033584
with ArchiveMaker() as arc:
3585+
# points to `../../tmp/moo`
35043586
arc.add('../moo', symlink_to='..//tmp/moo')
35053587

35063588
try:
@@ -3551,6 +3633,56 @@ def test_sly_relative2(self):
35513633
+ """['"].*moo['"], which is outside the """
35523634
+ "destination")
35533635

3636+
@symlink_test
3637+
def test_deep_symlink(self):
3638+
# Test that symlinks and hardlinks inside a directory
3639+
# point to the correct file (`target` of size 3).
3640+
# If links aren't supported we get a copy of the file.
3641+
with ArchiveMaker() as arc:
3642+
arc.add('targetdir/target', size=3)
3643+
# a hardlink's linkname is relative to the archive
3644+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3645+
'targetdir', 'target'))
3646+
# a symlink's linkname is relative to the link's directory
3647+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3648+
'..', 'targetdir', 'target'))
3649+
3650+
for filter in 'tar', 'data', 'fully_trusted':
3651+
with self.check_context(arc.open(), filter):
3652+
self.expect_file('targetdir/target', size=3)
3653+
self.expect_file('linkdir/hardlink', size=3)
3654+
if os_helper.can_symlink():
3655+
self.expect_file('linkdir/symlink', size=3,
3656+
symlink_to='../targetdir/target')
3657+
else:
3658+
self.expect_file('linkdir/symlink', size=3)
3659+
3660+
@symlink_test
3661+
def test_chains(self):
3662+
# Test chaining of symlinks/hardlinks.
3663+
# Symlinks are created before the files they point to.
3664+
with ArchiveMaker() as arc:
3665+
arc.add('linkdir/symlink', symlink_to='hardlink')
3666+
arc.add('symlink2', symlink_to=os.path.join(
3667+
'linkdir', 'hardlink2'))
3668+
arc.add('targetdir/target', size=3)
3669+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3670+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3671+
3672+
for filter in 'tar', 'data', 'fully_trusted':
3673+
with self.check_context(arc.open(), filter):
3674+
self.expect_file('targetdir/target', size=3)
3675+
self.expect_file('linkdir/hardlink', size=3)
3676+
self.expect_file('linkdir/hardlink2', size=3)
3677+
if os_helper.can_symlink():
3678+
self.expect_file('linkdir/symlink', size=3,
3679+
symlink_to='hardlink')
3680+
self.expect_file('symlink2', size=3,
3681+
symlink_to='linkdir/hardlink2')
3682+
else:
3683+
self.expect_file('linkdir/symlink', size=3)
3684+
self.expect_file('symlink2', size=3)
3685+
35543686
def test_modes(self):
35553687
# Test how file modes are extracted
35563688
# (Note that the modes are ignored on platforms without working chmod)
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.