From 11efec105578f03e96bf83f3a3a6d3605b747623 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Wed, 7 Aug 2019 00:20:33 -0400 Subject: [PATCH 01/23] fix Path._add_implied_dirs to include all implied directories --- Lib/zipfile.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 3c1f1235034a9e2..6e0cc3af64c071a 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -8,6 +8,7 @@ import importlib.util import io import os +import pathlib import posixpath import shutil import stat @@ -2228,11 +2229,18 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - return names + [ + subdirs = list(set([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ] + ])) + missing_dirs = set() + for sd in list(subdirs): + for p in pathlib.PurePath(sd).parents: + if str(p) not in [".", "/"] and str(p) + "/" not in subdirs: + missing_dirs.add(str(p) + "/") + subdirs.extend(list(missing_dirs)) + return names + subdirs @property def parent(self): From 12491c584f29dbf6678e5f4041233a82ecdb6999 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Wed, 7 Aug 2019 00:20:33 -0400 Subject: [PATCH 02/23] fix Path._add_implied_dirs to include all implied directories --- Lib/zipfile.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 3c1f1235034a9e2..6e0cc3af64c071a 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -8,6 +8,7 @@ import importlib.util import io import os +import pathlib import posixpath import shutil import stat @@ -2228,11 +2229,18 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - return names + [ + subdirs = list(set([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ] + ])) + missing_dirs = set() + for sd in list(subdirs): + for p in pathlib.PurePath(sd).parents: + if str(p) not in [".", "/"] and str(p) + "/" not in subdirs: + missing_dirs.add(str(p) + "/") + subdirs.extend(list(missing_dirs)) + return names + subdirs @property def parent(self): From b7266c13f8586dd686d11a8e5f51922e448f5ab5 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Wed, 7 Aug 2019 18:32:42 -0400 Subject: [PATCH 03/23] Optimize code by using sets instead of lists --- Lib/zipfile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 6e0cc3af64c071a..5fe87722c655b45 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2229,18 +2229,18 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - subdirs = list(set([ + subdirs = set([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ])) + ]) missing_dirs = set() for sd in list(subdirs): for p in pathlib.PurePath(sd).parents: - if str(p) not in [".", "/"] and str(p) + "/" not in subdirs: + if str(p) not in {".", "/"} and str(p) + "/" not in subdirs: missing_dirs.add(str(p) + "/") - subdirs.extend(list(missing_dirs)) - return names + subdirs + subdirs.update(missing_dirs) + return names + list(subdirs) @property def parent(self): From 10f50b156948335946df85d9cdec306b24c9b08b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2019 23:48:10 +0000 Subject: [PATCH 04/23] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst new file mode 100644 index 000000000000000..f78d0232c39a938 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst @@ -0,0 +1 @@ +The zipfile.Path.iterdir() method calls a static method _add_implied_dirs. This was either returning duplicate directory names or missing directories. The fix was to use a set to collect unique directories using posixpath.dirname and then taking a second pass on collected set and use pathlib.PurePath on each directory and collect the parents and add that to our set and finally return all as a list. \ No newline at end of file From ca05ad6988bbd548f86f90ee2fb55acb2d7b544d Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Wed, 7 Aug 2019 00:20:33 -0400 Subject: [PATCH 05/23] fix Path._add_implied_dirs to include all implied directories --- Lib/zipfile.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 3c1f1235034a9e2..6e0cc3af64c071a 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -8,6 +8,7 @@ import importlib.util import io import os +import pathlib import posixpath import shutil import stat @@ -2228,11 +2229,18 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - return names + [ + subdirs = list(set([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ] + ])) + missing_dirs = set() + for sd in list(subdirs): + for p in pathlib.PurePath(sd).parents: + if str(p) not in [".", "/"] and str(p) + "/" not in subdirs: + missing_dirs.add(str(p) + "/") + subdirs.extend(list(missing_dirs)) + return names + subdirs @property def parent(self): From 342a447ad60546b47e752e714de5f928c6f4cdd6 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Wed, 7 Aug 2019 18:32:42 -0400 Subject: [PATCH 06/23] Optimize code by using sets instead of lists --- Lib/zipfile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 6e0cc3af64c071a..5fe87722c655b45 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2229,18 +2229,18 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - subdirs = list(set([ + subdirs = set([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ])) + ]) missing_dirs = set() for sd in list(subdirs): for p in pathlib.PurePath(sd).parents: - if str(p) not in [".", "/"] and str(p) + "/" not in subdirs: + if str(p) not in {".", "/"} and str(p) + "/" not in subdirs: missing_dirs.add(str(p) + "/") - subdirs.extend(list(missing_dirs)) - return names + subdirs + subdirs.update(missing_dirs) + return names + list(subdirs) @property def parent(self): From c36975f8742da817e3f2590ec6ed7463e5d63b57 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2019 23:48:10 +0000 Subject: [PATCH 07/23] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst new file mode 100644 index 000000000000000..f78d0232c39a938 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst @@ -0,0 +1 @@ +The zipfile.Path.iterdir() method calls a static method _add_implied_dirs. This was either returning duplicate directory names or missing directories. The fix was to use a set to collect unique directories using posixpath.dirname and then taking a second pass on collected set and use pathlib.PurePath on each directory and collect the parents and add that to our set and finally return all as a list. \ No newline at end of file From 69109a74779bc595422575ef939fe746144d405a Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Fri, 9 Aug 2019 14:55:31 -0400 Subject: [PATCH 08/23] Add tests to zipfile.Path.iterdir() fix --- Lib/test/test_zipfile.py | 91 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 0c8ffcdbf14afe2..e8fcfb71a348c53 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2403,10 +2403,19 @@ def add_dirs(zipfile): any directories implied by the presence of children. """ names = zipfile.namelist() + subdirs = set([ + name for name in map(posixpath.dirname, names) + if name and name + "/" not in names + ]) + missingdirs = set([ + str(p) for sd in subdirs + for p in pathlib.PurePath(sd).parents + if str(p) not in {".", "/"} + ]) + subdirs.update(missingdirs) consume( zipfile.writestr(name + "/", b"") - for name in map(posixpath.dirname, names) - if name and name + "/" not in names + for name in subdirs ) return zipfile @@ -2431,6 +2440,46 @@ def build_abcde_files(): return zf +def build_abcdef_files(): + """ + Create a zip file with this structure: + + . + ├── a.txt + └── b + ├── c.txt + └── d + │ └── e.txt + └── f.txt + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("a.txt", b"content of a") + zf.writestr("b/c.txt", b"content of c") + zf.writestr("b/d/e.txt", b"content of e") + zf.writestr("b/f.txt", "content of f") + zf.filename = "abcdef.zip" + return zf + + +def build_abde_files(): + """ + Create a zip file with this structure: + + . + ├── a.txt + └── b + └── d + └── e.txt + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("a.txt", b"content of a") + zf.writestr("b/d/e.txt", b"content of e") + zf.filename = "abcdef.zip" + return zf + + class TestPath(unittest.TestCase): def setUp(self): self.fixtures = contextlib.ExitStack() @@ -2442,6 +2491,18 @@ def zipfile_abcde(self): with self.subTest(): yield add_dirs(build_abcde_files()) + def zipfile_abcdef(self): + with self.subTest(): + yield build_abcdef_files() + with self.subTest(): + yield add_dirs(build_abcdef_files()) + + def zipfile_abde(self): + with self.subTest(): + yield build_abde_files() + with self.subTest(): + yield add_dirs(build_abde_files()) + def zipfile_ondisk(self): tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) for zipfile_abcde in self.zipfile_abcde(): @@ -2464,6 +2525,32 @@ def test_iterdir_istype(self): e, = d.iterdir() assert e.is_file() + def test_iterdir_abcdef_istype(self): + for zipfile_abcdef in self.zipfile_abcdef(): + root = zipfile.Path(zipfile_abcdef) + assert root.is_dir() + a, b = root.iterdir() + assert a.is_file() + assert b.is_dir() + c, f, d = b.iterdir() + assert c.is_file() + assert f.is_file() + assert d.is_dir() + e, = d.iterdir() + assert e.is_file() + + def test_iterdir_abde_istype(self): + for zipfile_abde in self.zipfile_abde(): + root = zipfile.Path(zipfile_abde) + assert root.is_dir() + a, b = root.iterdir() + assert a.is_file() + assert b.is_dir() + d, = b.iterdir() + assert d.is_dir() + e, = d.iterdir() + assert e.is_file() + def test_open(self): for zipfile_abcde in self.zipfile_abcde(): root = zipfile.Path(zipfile_abcde) From 42c9e51fa8a16d5e72d0e4bf2a16a4819eaa8c17 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Fri, 9 Aug 2019 15:24:27 -0400 Subject: [PATCH 09/23] Update test for zipfile.Path.iterdir() --- Lib/test/test_zipfile.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index e8fcfb71a348c53..b857f0b94ad9c2c 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2529,26 +2529,38 @@ def test_iterdir_abcdef_istype(self): for zipfile_abcdef in self.zipfile_abcdef(): root = zipfile.Path(zipfile_abcdef) assert root.is_dir() - a, b = root.iterdir() + temp_list = list(root.iterdir()) + assert len(temp_list) == 2 + a, b = temp_list assert a.is_file() assert b.is_dir() - c, f, d = b.iterdir() + temp_list = list(b.iterdir()) + assert len(temp_list) == 3 + c, f, d = temp_list assert c.is_file() assert f.is_file() assert d.is_dir() - e, = d.iterdir() + temp_list = list(d.iterdir()) + assert len(temp_list) == 1 + e, = temp_list assert e.is_file() def test_iterdir_abde_istype(self): for zipfile_abde in self.zipfile_abde(): root = zipfile.Path(zipfile_abde) assert root.is_dir() - a, b = root.iterdir() + temp_list = list(root.iterdir()) + assert len(temp_list) == 2 + a, b = temp_list assert a.is_file() assert b.is_dir() - d, = b.iterdir() + temp_list = list(b.iterdir()) + assert len(temp_list) == 1 + d, = temp_list assert d.is_dir() - e, = d.iterdir() + temp_list = list(d.iterdir()) + assert len(temp_list) == 1 + e, = temp_list assert e.is_file() def test_open(self): From 2c55595337665d1ca8b8c5112f9c318d7460b740 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Fri, 9 Aug 2019 15:47:36 -0400 Subject: [PATCH 10/23] remove whitespace from test file --- Lib/test/test_zipfile.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index b857f0b94ad9c2c..e5534b3344494a0 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2404,11 +2404,11 @@ def add_dirs(zipfile): """ names = zipfile.namelist() subdirs = set([ - name for name in map(posixpath.dirname, names) + name for name in map(posixpath.dirname, names) if name and name + "/" not in names ]) missingdirs = set([ - str(p) for sd in subdirs + str(p) for sd in subdirs for p in pathlib.PurePath(sd).parents if str(p) not in {".", "/"} ]) @@ -2461,7 +2461,7 @@ def build_abcdef_files(): zf.filename = "abcdef.zip" return zf - + def build_abde_files(): """ Create a zip file with this structure: @@ -2477,8 +2477,8 @@ def build_abde_files(): zf.writestr("a.txt", b"content of a") zf.writestr("b/d/e.txt", b"content of e") zf.filename = "abcdef.zip" - return zf - + return zf + class TestPath(unittest.TestCase): def setUp(self): @@ -2496,12 +2496,12 @@ def zipfile_abcdef(self): yield build_abcdef_files() with self.subTest(): yield add_dirs(build_abcdef_files()) - + def zipfile_abde(self): with self.subTest(): yield build_abde_files() with self.subTest(): - yield add_dirs(build_abde_files()) + yield add_dirs(build_abde_files()) def zipfile_ondisk(self): tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) @@ -2543,7 +2543,7 @@ def test_iterdir_abcdef_istype(self): temp_list = list(d.iterdir()) assert len(temp_list) == 1 e, = temp_list - assert e.is_file() + assert e.is_file() def test_iterdir_abde_istype(self): for zipfile_abde in self.zipfile_abde(): @@ -2561,8 +2561,8 @@ def test_iterdir_abde_istype(self): temp_list = list(d.iterdir()) assert len(temp_list) == 1 e, = temp_list - assert e.is_file() - + assert e.is_file() + def test_open(self): for zipfile_abcde in self.zipfile_abcde(): root = zipfile.Path(zipfile_abcde) From 64c50a3df9d7024144df3ef189ac18ed22e4627c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 10 Aug 2019 09:57:38 -0400 Subject: [PATCH 11/23] Rewrite NEWS blurb to describe the user-facing impact and avoid implementation details. --- .../next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst index f78d0232c39a938..f9ec6a33b07bad9 100644 --- a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst +++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst @@ -1 +1 @@ -The zipfile.Path.iterdir() method calls a static method _add_implied_dirs. This was either returning duplicate directory names or missing directories. The fix was to use a set to collect unique directories using posixpath.dirname and then taking a second pass on collected set and use pathlib.PurePath on each directory and collect the parents and add that to our set and finally return all as a list. \ No newline at end of file +In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded. From 9e9e42f4d0431fa5ab043830ae77e863afee78ee Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Sat, 10 Aug 2019 15:12:32 -0400 Subject: [PATCH 12/23] remove redundant [] within set comprehension --- Lib/test/test_zipfile.py | 14 ++++++++------ Lib/zipfile.py | 15 ++++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index e5534b3344494a0..709a7b516014638 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2403,15 +2403,17 @@ def add_dirs(zipfile): any directories implied by the presence of children. """ names = zipfile.namelist() - subdirs = set([ - name for name in map(posixpath.dirname, names) + subdirs = set( + name + for name in map(posixpath.dirname, names) if name and name + "/" not in names - ]) - missingdirs = set([ - str(p) for sd in subdirs + ) + missingdirs = set( + str(p) + for sd in subdirs for p in pathlib.PurePath(sd).parents if str(p) not in {".", "/"} - ]) + ) subdirs.update(missingdirs) consume( zipfile.writestr(name + "/", b"") diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 5fe87722c655b45..213c49b4e766f85 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2229,16 +2229,17 @@ def joinpath(self, add): @staticmethod def _add_implied_dirs(names): - subdirs = set([ + subdirs = set( name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ]) - missing_dirs = set() - for sd in list(subdirs): - for p in pathlib.PurePath(sd).parents: - if str(p) not in {".", "/"} and str(p) + "/" not in subdirs: - missing_dirs.add(str(p) + "/") + ) + missing_dirs = set( + str(p) + "/" + for sd in subdirs + for p in pathlib.PurePath(sd).parents + if str(p) not in {".", "/"} and str(p) + "/" not in subdirs + ) subdirs.update(missing_dirs) return names + list(subdirs) From eeb5f98396035b183cbecc1c2b20ceb52f873a87 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Sat, 10 Aug 2019 16:52:45 -0400 Subject: [PATCH 13/23] Update to use unique_everseen to maintain order and other suggestions in review --- Lib/test/test_zipfile.py | 4 ++-- Lib/zipfile.py | 30 ++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 709a7b516014638..1bdb0486bed59eb 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2490,8 +2490,8 @@ def setUp(self): def zipfile_abcde(self): with self.subTest(): yield build_abcde_files() - with self.subTest(): - yield add_dirs(build_abcde_files()) + # with self.subTest(): + # yield add_dirs(build_abcde_files()) def zipfile_abcdef(self): with self.subTest(): diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 213c49b4e766f85..df7808d116b8c00 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -7,6 +7,7 @@ import functools import importlib.util import io +import itertools import os import pathlib import posixpath @@ -2227,21 +2228,38 @@ def joinpath(self, add): __truediv__ = joinpath + @staticmethod + def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in itertools.filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + @staticmethod def _add_implied_dirs(names): - subdirs = set( + subdirs = list(Path.unique_everseen([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names - ) - missing_dirs = set( + ])) + missing_dirs = [ str(p) + "/" for sd in subdirs for p in pathlib.PurePath(sd).parents if str(p) not in {".", "/"} and str(p) + "/" not in subdirs - ) - subdirs.update(missing_dirs) - return names + list(subdirs) + ] + return names + subdirs + missing_dirs @property def parent(self): From 7b1b6f53cf55c02df47af1961c2469f1e928abd1 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Sat, 10 Aug 2019 17:24:46 -0400 Subject: [PATCH 14/23] remove whitespace and add back add_dirs in tests --- Lib/test/test_zipfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 1bdb0486bed59eb..952b3daca68dbb4 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2404,12 +2404,12 @@ def add_dirs(zipfile): """ names = zipfile.namelist() subdirs = set( - name + name for name in map(posixpath.dirname, names) if name and name + "/" not in names ) missingdirs = set( - str(p) + str(p) for sd in subdirs for p in pathlib.PurePath(sd).parents if str(p) not in {".", "/"} @@ -2490,8 +2490,8 @@ def setUp(self): def zipfile_abcde(self): with self.subTest(): yield build_abcde_files() - # with self.subTest(): - # yield add_dirs(build_abcde_files()) + with self.subTest(): + yield add_dirs(build_abcde_files()) def zipfile_abcdef(self): with self.subTest(): From ad05c797655b029a1e11c49f0e7b45a93880f8dd Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Mon, 12 Aug 2019 10:00:31 -0400 Subject: [PATCH 15/23] Add new standalone function parents using posixpath to get parents of a directory --- Lib/zipfile.py | 66 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index df7808d116b8c00..48934fde82267b7 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2106,6 +2106,44 @@ def _compile(file, optimize=-1): return (fname, archivename) +def _unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in itertools.filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +def _parents(name): + """ + for given directory, return a list of parents + example: 'b/' returns ['.'] + 'b/d/' return ['b', '.'] + """ + subdir_list = [name] + subdir_parents = [] + while True: + head, tail = posixpath.split(name) + if head: + if head + "/" not in subdir_list: + subdir_parents.append(head) + name = head + else: + subdir_parents.append(".") + break + return subdir_parents + + class Path: """ A pathlib-compatible interface for zip files. @@ -2226,38 +2264,20 @@ def joinpath(self, add): names = self._names() return self._next(next_dir if next not in names and next_dir in names else next) - __truediv__ = joinpath - - @staticmethod - def unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in itertools.filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element + __truediv__ = joinpath @staticmethod def _add_implied_dirs(names): - subdirs = list(Path.unique_everseen([ + subdirs = list(_unique_everseen([ name + "/" for name in map(posixpath.dirname, names) if name and name + "/" not in names ])) missing_dirs = [ - str(p) + "/" + p + "/" for sd in subdirs - for p in pathlib.PurePath(sd).parents - if str(p) not in {".", "/"} and str(p) + "/" not in subdirs + for p in _parents(sd) + if p not in {".", "/"} and p + "/" not in subdirs ] return names + subdirs + missing_dirs From 9476650b7fe5a53808ccd7df060cebb7d154552f Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Mon, 12 Aug 2019 10:03:05 -0400 Subject: [PATCH 16/23] removing whitespace (sorry) --- Lib/zipfile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 48934fde82267b7..1c5624431731178 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2125,8 +2125,8 @@ def _unique_everseen(iterable, key=None): def _parents(name): - """ - for given directory, return a list of parents + """ + for given directory, return a list of parents example: 'b/' returns ['.'] 'b/d/' return ['b', '.'] """ @@ -2141,9 +2141,9 @@ def _parents(name): else: subdir_parents.append(".") break - return subdir_parents - - + return subdir_parents + + class Path: """ A pathlib-compatible interface for zip files. @@ -2264,7 +2264,7 @@ def joinpath(self, add): names = self._names() return self._next(next_dir if next not in names and next_dir in names else next) - __truediv__ = joinpath + __truediv__ = joinpath @staticmethod def _add_implied_dirs(names): From f9c1706454c00f3e412ea4bf421a5cd5b8ac4f19 Mon Sep 17 00:00:00 2001 From: Srinivas Nyayapati Date: Mon, 12 Aug 2019 10:05:35 -0400 Subject: [PATCH 17/23] Remove import pathlib from zipfile.py --- Lib/zipfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 1c5624431731178..11db7dce216621e 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -9,7 +9,6 @@ import io import itertools import os -import pathlib import posixpath import shutil import stat From c153a6f1438fbe165737d9131d64bc8a76c71ecb Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 09:22:15 -0400 Subject: [PATCH 18/23] Rewrite _parents as a slice on a generator of the ancestry of a path. --- Lib/zipfile.py | 53 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 11db7dce216621e..fd16a2fbb8ab449 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2123,24 +2123,45 @@ def _unique_everseen(iterable, key=None): yield element -def _parents(name): +def _parents(path): """ - for given directory, return a list of parents - example: 'b/' returns ['.'] - 'b/d/' return ['b', '.'] + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] """ - subdir_list = [name] - subdir_parents = [] - while True: - head, tail = posixpath.split(name) - if head: - if head + "/" not in subdir_list: - subdir_parents.append(head) - name = head - else: - subdir_parents.append(".") - break - return subdir_parents + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) class Path: From 0002c296a64c0b9064ab51659b5bff2cc6ea6d0e Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 09:28:00 -0400 Subject: [PATCH 19/23] Remove check for '.' and '/', now that parents no longer returns those. --- Lib/zipfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index fd16a2fbb8ab449..6074e185ab67923 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2297,7 +2297,7 @@ def _add_implied_dirs(names): p + "/" for sd in subdirs for p in _parents(sd) - if p not in {".", "/"} and p + "/" not in subdirs + if p + "/" not in subdirs ] return names + subdirs + missing_dirs From c1cbec623aea9b416e59ea92a8407c827d4f7f82 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 09:29:27 -0400 Subject: [PATCH 20/23] Separate calculation of implied dirs from adding those --- Lib/zipfile.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 6074e185ab67923..e402ccc3ad45734 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2287,7 +2287,7 @@ def joinpath(self, add): __truediv__ = joinpath @staticmethod - def _add_implied_dirs(names): + def _implied_dirs(names): subdirs = list(_unique_everseen([ name + "/" for name in map(posixpath.dirname, names) @@ -2299,7 +2299,11 @@ def _add_implied_dirs(names): for p in _parents(sd) if p + "/" not in subdirs ] - return names + subdirs + missing_dirs + return subdirs + missing_dirs + + @classmethod + def _add_implied_dirs(cls, names): + return names + cls._implied_dirs(names) @property def parent(self): From 78a584efdf3b97e2c5376e6a61ed1a8031d1c074 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 09:38:35 -0400 Subject: [PATCH 21/23] Re-use _implied_dirs in tests for generating zipfile with dir entries. --- Lib/test/test_zipfile.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 952b3daca68dbb4..23d8ff5186406cb 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2397,29 +2397,14 @@ def test_extract_command(self): consume = tuple -def add_dirs(zipfile): +def add_dirs(zf): """ - Given a writable zipfile, inject directory entries for + Given a writable zip file zf, inject directory entries for any directories implied by the presence of children. """ - names = zipfile.namelist() - subdirs = set( - name - for name in map(posixpath.dirname, names) - if name and name + "/" not in names - ) - missingdirs = set( - str(p) - for sd in subdirs - for p in pathlib.PurePath(sd).parents - if str(p) not in {".", "/"} - ) - subdirs.update(missingdirs) - consume( - zipfile.writestr(name + "/", b"") - for name in subdirs - ) - return zipfile + for name in zipfile.Path._implied_dirs(zf.namelist()): + zf.writestr(name, b"") + return zf def build_abcde_files(): From 2384aa71220f8e8ed888d30b3d94d2ce2941ff91 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 10:13:38 -0400 Subject: [PATCH 22/23] Replace three fixtures (abcde, abcdef, abde) with one representative example alpharep. --- Lib/test/test_zipfile.py | 185 ++++++++++++--------------------------- 1 file changed, 58 insertions(+), 127 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 23d8ff5186406cb..8e437e5cb2b90c8 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2407,63 +2407,39 @@ def add_dirs(zf): return zf -def build_abcde_files(): +def build_alpharep_fixture(): """ Create a zip file with this structure: . ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt + ├── b + │ ├── c.txt + │ ├── d + │ │ └── e.txt + │ └── f.txt + └── g + └── h + └── i.txt + + This fixture has the following key characteristics: + + - a file at the root (a) + - a file two levels deep (b/d/e) + - multiple files in a directory (b/c, b/f) + - a directory containing only a directory (g/h) + + "alpha" because it uses alphabet + "rep" because it's a representative example """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") zf.writestr("a.txt", b"content of a") zf.writestr("b/c.txt", b"content of c") zf.writestr("b/d/e.txt", b"content of e") - zf.filename = "abcde.zip" - return zf - - -def build_abcdef_files(): - """ - Create a zip file with this structure: - - . - ├── a.txt - └── b - ├── c.txt - └── d - │ └── e.txt - └── f.txt - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr("a.txt", b"content of a") - zf.writestr("b/c.txt", b"content of c") - zf.writestr("b/d/e.txt", b"content of e") - zf.writestr("b/f.txt", "content of f") - zf.filename = "abcdef.zip" - return zf - - -def build_abde_files(): - """ - Create a zip file with this structure: - - . - ├── a.txt - └── b - └── d - └── e.txt - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr("a.txt", b"content of a") - zf.writestr("b/d/e.txt", b"content of e") - zf.filename = "abcdef.zip" + zf.writestr("b/f.txt", b"content of f") + zf.writestr("g/h/i.txt", b"content of i") + zf.filename = "alpharep.zip" return zf @@ -2472,110 +2448,64 @@ def setUp(self): self.fixtures = contextlib.ExitStack() self.addCleanup(self.fixtures.close) - def zipfile_abcde(self): - with self.subTest(): - yield build_abcde_files() - with self.subTest(): - yield add_dirs(build_abcde_files()) - - def zipfile_abcdef(self): - with self.subTest(): - yield build_abcdef_files() - with self.subTest(): - yield add_dirs(build_abcdef_files()) - - def zipfile_abde(self): + def zipfile_alpharep(self): with self.subTest(): - yield build_abde_files() + yield build_alpharep_fixture() with self.subTest(): - yield add_dirs(build_abde_files()) + yield add_dirs(build_alpharep_fixture()) def zipfile_ondisk(self): tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) - for zipfile_abcde in self.zipfile_abcde(): - buffer = zipfile_abcde.fp - zipfile_abcde.close() - path = tmpdir / zipfile_abcde.filename + for alpharep in self.zipfile_alpharep(): + buffer = alpharep.fp + alpharep.close() + path = tmpdir / alpharep.filename with path.open("wb") as strm: strm.write(buffer.getvalue()) yield path - def test_iterdir_istype(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + def test_iterdir_and_types(self): + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert root.is_dir() - a, b = root.iterdir() + a, b, g = root.iterdir() assert a.is_file() assert b.is_dir() - c, d = b.iterdir() - assert c.is_file() + assert g.is_dir() + c, f, d = b.iterdir() + assert c.is_file() and f.is_file() e, = d.iterdir() assert e.is_file() - - def test_iterdir_abcdef_istype(self): - for zipfile_abcdef in self.zipfile_abcdef(): - root = zipfile.Path(zipfile_abcdef) - assert root.is_dir() - temp_list = list(root.iterdir()) - assert len(temp_list) == 2 - a, b = temp_list - assert a.is_file() - assert b.is_dir() - temp_list = list(b.iterdir()) - assert len(temp_list) == 3 - c, f, d = temp_list - assert c.is_file() - assert f.is_file() - assert d.is_dir() - temp_list = list(d.iterdir()) - assert len(temp_list) == 1 - e, = temp_list - assert e.is_file() - - def test_iterdir_abde_istype(self): - for zipfile_abde in self.zipfile_abde(): - root = zipfile.Path(zipfile_abde) - assert root.is_dir() - temp_list = list(root.iterdir()) - assert len(temp_list) == 2 - a, b = temp_list - assert a.is_file() - assert b.is_dir() - temp_list = list(b.iterdir()) - assert len(temp_list) == 1 - d, = temp_list - assert d.is_dir() - temp_list = list(d.iterdir()) - assert len(temp_list) == 1 - e, = temp_list - assert e.is_file() + h, = g.iterdir() + i, = h.iterdir() + assert i.is_file() def test_open(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) - a, b = root.iterdir() + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() with a.open() as strm: data = strm.read() assert data == b"content of a" def test_read(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) - a, b = root.iterdir() + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() assert a.read_text() == "content of a" assert a.read_bytes() == b"content of a" def test_joinpath(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) a = root.joinpath("a") assert a.is_file() e = root.joinpath("b").joinpath("d").joinpath("e.txt") assert e.read_text() == "content of e" def test_traverse_truediv(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) a = root / "a" assert a.is_file() e = root / "b" / "d" / "e.txt" @@ -2590,26 +2520,27 @@ def test_pathlike_construction(self): zipfile.Path(pathlike) def test_traverse_pathlike(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) root / pathlib.Path("a") def test_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'a').parent.at == '' assert (root / 'a' / 'b').parent.at == 'a/' def test_dir_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'b').parent.at == '' assert (root / 'b/').parent.at == '' def test_missing_dir_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'missing dir/').parent.at == '' + if __name__ == "__main__": unittest.main() From be2a13ad638d6865e09f19272b60f15794d68662 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 24 Aug 2019 10:52:39 -0400 Subject: [PATCH 23/23] Simplify implementation of _implied_dirs by collapsing the generation of parent directories for each name. --- Lib/zipfile.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index e402ccc3ad45734..dfd090795019622 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2288,22 +2288,16 @@ def joinpath(self, add): @staticmethod def _implied_dirs(names): - subdirs = list(_unique_everseen([ - name + "/" - for name in map(posixpath.dirname, names) - if name and name + "/" not in names - ])) - missing_dirs = [ - p + "/" - for sd in subdirs - for p in _parents(sd) - if p + "/" not in subdirs - ] - return subdirs + missing_dirs + return _unique_everseen( + parent + "/" + for name in names + for parent in _parents(name) + if parent + "/" not in names + ) @classmethod def _add_implied_dirs(cls, names): - return names + cls._implied_dirs(names) + return names + list(cls._implied_dirs(names)) @property def parent(self):