Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0aa1ee2

Browse filesBrowse files
authored
[3.10] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123426)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) (cherry picked from commit 17b77bb) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
1 parent d3f39ce commit 0aa1ee2
Copy full SHA for 0aa1ee2

File tree

3 files changed

+81
-67
lines changed
Filter options

3 files changed

+81
-67
lines changed

‎Lib/test/test_zipfile.py

Copy file name to clipboardExpand all lines: Lib/test/test_zipfile.py
+70-6Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import itertools
66
import os
77
import pathlib
8+
import platform
89
import posixpath
910
import string
1011
import struct
@@ -3282,7 +3283,11 @@ def test_extract_orig_with_implied_dirs(self, alpharep):
32823283

32833284
def test_malformed_paths(self):
32843285
"""
3285-
Path should handle malformed paths.
3286+
Path should handle malformed paths gracefully.
3287+
3288+
Paths with leading slashes are not visible.
3289+
3290+
Paths with dots are treated like regular files.
32863291
"""
32873292
data = io.BytesIO()
32883293
zf = zipfile.ZipFile(data, "w")
@@ -3291,11 +3296,70 @@ def test_malformed_paths(self):
32913296
zf.writestr("../parent.txt", b"content")
32923297
zf.filename = ''
32933298
root = zipfile.Path(zf)
3294-
assert list(map(str, root.iterdir())) == [
3295-
'one-slash.txt',
3296-
'two-slash.txt',
3297-
'parent.txt',
3298-
]
3299+
assert list(map(str, root.iterdir())) == ['../']
3300+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
3301+
3302+
@unittest.skipIf(platform.system() == "Windows", "GH-123693")
3303+
def test_unsupported_names(self):
3304+
"""
3305+
Path segments with special characters are readable.
3306+
3307+
On some platforms or file systems, characters like
3308+
``:`` and ``?`` are not allowed, but they are valid
3309+
in the zip file.
3310+
"""
3311+
data = io.BytesIO()
3312+
zf = zipfile.ZipFile(data, "w")
3313+
zf.writestr("path?", b"content")
3314+
zf.writestr("V: NMS.flac", b"fLaC...")
3315+
zf.filename = ''
3316+
root = zipfile.Path(zf)
3317+
contents = root.iterdir()
3318+
assert next(contents).name == 'path?'
3319+
item = next(contents)
3320+
assert item.name == 'V: NMS.flac', item.name
3321+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
3322+
3323+
@unittest.skipIf(platform.system() == "Windows", "GH-123693")
3324+
def test_backslash_not_separator(self):
3325+
"""
3326+
In a zip file, backslashes are not separators.
3327+
"""
3328+
data = io.BytesIO()
3329+
zf = zipfile.ZipFile(data, "w")
3330+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
3331+
zf.filename = ''
3332+
root = zipfile.Path(zf)
3333+
(first,) = root.iterdir()
3334+
assert not first.is_dir()
3335+
assert first.name == 'foo\\bar', first.name
3336+
3337+
3338+
class DirtyZipInfo(zipfile.ZipInfo):
3339+
"""
3340+
Bypass name sanitization.
3341+
"""
3342+
3343+
def __init__(self, filename, *args, **kwargs):
3344+
super().__init__(filename, *args, **kwargs)
3345+
self.filename = filename
3346+
3347+
@classmethod
3348+
def for_name(cls, name, archive):
3349+
"""
3350+
Construct the same way that ZipFile.writestr does.
3351+
3352+
TODO: extract this functionality and re-use
3353+
"""
3354+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
3355+
self.compress_type = archive.compression
3356+
self.compress_level = archive.compresslevel
3357+
if self.filename.endswith('/'): # pragma: no cover
3358+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
3359+
self.external_attr |= 0x10 # MS-DOS directory flag
3360+
else:
3361+
self.external_attr = 0o600 << 16 # ?rw-------
3362+
return self
32993363

33003364

33013365
class StripExtraTests(unittest.TestCase):

‎Lib/zipfile.py

Copy file name to clipboardExpand all lines: Lib/zipfile.py
+8-61Lines changed: 8 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,7 @@ def _parents(path):
21522152
def _ancestry(path):
21532153
"""
21542154
Given a path with elements separated by
2155-
posixpath.sep, generate all elements of that path
2155+
posixpath.sep, generate all elements of that path.
21562156
21572157
>>> list(_ancestry('b/d'))
21582158
['b/d', 'b']
@@ -2164,9 +2164,14 @@ def _ancestry(path):
21642164
['b']
21652165
>>> list(_ancestry(''))
21662166
[]
2167+
2168+
Multiple separators are treated like a single.
2169+
2170+
>>> list(_ancestry('//b//d///f//'))
2171+
['//b//d///f', '//b//d', '//b']
21672172
"""
21682173
path = path.rstrip(posixpath.sep)
2169-
while path and path != posixpath.sep:
2174+
while path.rstrip(posixpath.sep):
21702175
yield path
21712176
path, tail = posixpath.split(path)
21722177

@@ -2183,65 +2188,7 @@ def _difference(minuend, subtrahend):
21832188
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
21842189

21852190

2186-
class SanitizedNames:
2187-
"""
2188-
ZipFile mix-in to ensure names are sanitized.
2189-
"""
2190-
2191-
def namelist(self):
2192-
return list(map(self._sanitize, super().namelist()))
2193-
2194-
@staticmethod
2195-
def _sanitize(name):
2196-
r"""
2197-
Ensure a relative path with posix separators and no dot names.
2198-
Modeled after
2199-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
2200-
but provides consistent cross-platform behavior.
2201-
>>> san = SanitizedNames._sanitize
2202-
>>> san('/foo/bar')
2203-
'foo/bar'
2204-
>>> san('//foo.txt')
2205-
'foo.txt'
2206-
>>> san('foo/.././bar.txt')
2207-
'foo/bar.txt'
2208-
>>> san('foo../.bar.txt')
2209-
'foo../.bar.txt'
2210-
>>> san('\\foo\\bar.txt')
2211-
'foo/bar.txt'
2212-
>>> san('D:\\foo.txt')
2213-
'D/foo.txt'
2214-
>>> san('\\\\server\\share\\file.txt')
2215-
'server/share/file.txt'
2216-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
2217-
'?/GLOBALROOT/Volume3'
2218-
>>> san('\\\\.\\PhysicalDrive1\\root')
2219-
'PhysicalDrive1/root'
2220-
Retain any trailing slash.
2221-
>>> san('abc/')
2222-
'abc/'
2223-
Raises a ValueError if the result is empty.
2224-
>>> san('../..')
2225-
Traceback (most recent call last):
2226-
...
2227-
ValueError: Empty filename
2228-
"""
2229-
2230-
def allowed(part):
2231-
return part and part not in {'..', '.'}
2232-
2233-
# Remove the drive letter.
2234-
# Don't use ntpath.splitdrive, because that also strips UNC paths
2235-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
2236-
clean = bare.replace('\\', '/')
2237-
parts = clean.split('/')
2238-
joined = '/'.join(filter(allowed, parts))
2239-
if not joined:
2240-
raise ValueError("Empty filename")
2241-
return joined + '/' * name.endswith('/')
2242-
2243-
2244-
class CompleteDirs(SanitizedNames, ZipFile):
2191+
class CompleteDirs(ZipFile):
22452192
"""
22462193
A ZipFile subclass that ensures that implied directories
22472194
are always included in the namelist.
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.