From 777af5f08b4d74fc175cb59660758479bf7ff06a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 23 Mar 2025 16:36:40 +0000 Subject: [PATCH 1/3] pathlib ABCs: yield progress reports from `WritablePath._copy_from()` Make `WritablePath._copy_from()` yield `(target, source, part_size)` tuples when copying files and directories. A tuple with `part_size=0` is emitted for every path encountered, and further tuples with `part_size>0` **may** be emitted when copying regular files. This should allow `anyio.Path` to wrap `_copy_from()` and make it cancelable. --- Lib/pathlib/__init__.py | 18 +++++++++++------- Lib/pathlib/_os.py | 2 +- Lib/pathlib/types.py | 16 ++++++++++------ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index a8111cc4f305fa..a6da915f7b0511 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -1109,7 +1109,7 @@ def copy(self, target, **kwargs): copy_to_target = target._copy_from except AttributeError: raise TypeError(f"Target path is not writable: {target!r}") from None - copy_to_target(self, **kwargs) + list(copy_to_target(self, **kwargs)) # Consume generator. return target.joinpath() # Empty join to ensure fresh metadata. def copy_into(self, target_dir, **kwargs): @@ -1127,26 +1127,30 @@ def copy_into(self, target_dir, **kwargs): def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): """ - Recursively copy the given path to this path. + Recursively copy the given path to this path. This a generator + function that yields (target, source, part_size) tuples as the copying + operation progresses. """ + yield self, source, 0 if not follow_symlinks and source.info.is_symlink(): self._copy_from_symlink(source, preserve_metadata) elif source.info.is_dir(): children = source.iterdir() os.mkdir(self) for child in children: - self.joinpath(child.name)._copy_from( + yield from self.joinpath(child.name)._copy_from( child, follow_symlinks, preserve_metadata) if preserve_metadata: copy_info(source.info, self) else: - self._copy_from_file(source, preserve_metadata) + for part_size in self._copy_from_file(source, preserve_metadata): + yield self, source, part_size def _copy_from_file(self, source, preserve_metadata=False): ensure_different_files(source, self) with magic_open(source, 'rb') as source_f: with open(self, 'wb') as target_f: - copyfileobj(source_f, target_f) + yield from copyfileobj(source_f, target_f) if preserve_metadata: copy_info(source.info, self) @@ -1160,8 +1164,8 @@ def _copy_from_file(self, source, preserve_metadata=False): pass else: copyfile2(source, str(self)) - return - self._copy_from_file_fallback(source, preserve_metadata) + return iter([]) + return self._copy_from_file_fallback(source, preserve_metadata) if os.name == 'nt': # If a directory-symlink is copied *before* its target, then diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index ee8657f427efbd..061bcf49ba2b99 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -163,7 +163,7 @@ def copyfileobj(source_f, target_f): read_source = source_f.read write_target = target_f.write while buf := read_source(1024 * 1024): - write_target(buf) + yield write_target(buf) def magic_open(path, mode='r', buffering=-1, encoding=None, errors=None, diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py index 85dd9e5b2d6b9a..b77e79dfc51d53 100644 --- a/Lib/pathlib/types.py +++ b/Lib/pathlib/types.py @@ -343,7 +343,7 @@ def copy(self, target, **kwargs): copy_to_target = target._copy_from except AttributeError: raise TypeError(f"Target path is not writable: {target!r}") from None - copy_to_target(self, **kwargs) + list(copy_to_target(self, **kwargs)) # Consume generator. return target.joinpath() # Empty join to ensure fresh metadata. def copy_into(self, target_dir, **kwargs): @@ -413,23 +413,27 @@ def write_text(self, data, encoding=None, errors=None, newline=None): def _copy_from(self, source, follow_symlinks=True): """ - Recursively copy the given path to this path. + Recursively copy the given path to this path. This a generator + function that yields (target, source, part_size) tuples as the copying + operation progresses. """ - stack = [(source, self)] + stack = [(self, source)] while stack: - src, dst = stack.pop() + dst, src = stack.pop() + yield dst, src, 0 if not follow_symlinks and src.info.is_symlink(): dst.symlink_to(str(src.readlink()), src.info.is_dir()) elif src.info.is_dir(): children = src.iterdir() dst.mkdir() for child in children: - stack.append((child, dst.joinpath(child.name))) + stack.append((dst.joinpath(child.name), child)) else: ensure_different_files(src, dst) with magic_open(src, 'rb') as source_f: with magic_open(dst, 'wb') as target_f: - copyfileobj(source_f, target_f) + for part_size in copyfileobj(source_f, target_f): + yield dst, src, part_size _JoinablePath.register(PurePath) From f19ff132eaf4066395f7cc57213753f5146fa12d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 23 Mar 2025 18:30:02 +0000 Subject: [PATCH 2/3] Report progress from `os.sendfile()` and `os.copy_file_range()` --- Lib/pathlib/_os.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 061bcf49ba2b99..56eb9c59c239a7 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -80,6 +80,7 @@ def _copy_file_range(source_fd, target_fd): if sent == 0: break # EOF offset += sent + yield sent else: _copy_file_range = None @@ -97,6 +98,7 @@ def _sendfile(source_fd, target_fd): if sent == 0: break # EOF offset += sent + yield sent else: _sendfile = None @@ -141,14 +143,14 @@ def copyfileobj(source_f, target_f): raise err if _copy_file_range: try: - _copy_file_range(source_fd, target_fd) + yield from _copy_file_range(source_fd, target_fd) return except OSError as err: if err.errno not in (ETXTBSY, EXDEV): raise err if _sendfile: try: - _sendfile(source_fd, target_fd) + yield from _sendfile(source_fd, target_fd) return except OSError as err: if err.errno != ENOTSOCK: From 98d5c6d1aac10bbab8b444aed70e8d3c75e86f4a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 23 Mar 2025 18:47:18 +0000 Subject: [PATCH 3/3] Tidying, naming. --- Lib/pathlib/__init__.py | 10 +++++----- Lib/pathlib/_os.py | 17 ++++++----------- Lib/pathlib/types.py | 6 +++--- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index a6da915f7b0511..581d7e36ef3248 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -1128,7 +1128,7 @@ def copy_into(self, target_dir, **kwargs): def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): """ Recursively copy the given path to this path. This a generator - function that yields (target, source, part_size) tuples as the copying + function that yields (target, source, sent) tuples as the copying operation progresses. """ yield self, source, 0 @@ -1143,8 +1143,8 @@ def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): if preserve_metadata: copy_info(source.info, self) else: - for part_size in self._copy_from_file(source, preserve_metadata): - yield self, source, part_size + for sent in self._copy_from_file(source, preserve_metadata): + yield self, source, sent def _copy_from_file(self, source, preserve_metadata=False): ensure_different_files(source, self) @@ -1164,8 +1164,8 @@ def _copy_from_file(self, source, preserve_metadata=False): pass else: copyfile2(source, str(self)) - return iter([]) - return self._copy_from_file_fallback(source, preserve_metadata) + return + yield from self._copy_from_file_fallback(source, preserve_metadata) if os.name == 'nt': # If a directory-symlink is copied *before* its target, then diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 56eb9c59c239a7..87a3b4ffddef4f 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -72,15 +72,12 @@ def _copy_file_range(source_fd, target_fd): copy. This should work on Linux >= 4.5 only. """ + fn = os.copy_file_range blocksize = _get_copy_blocksize(source_fd) offset = 0 - while True: - sent = os.copy_file_range(source_fd, target_fd, blocksize, - offset_dst=offset) - if sent == 0: - break # EOF - offset += sent + while sent := fn(source_fd, target_fd, blocksize, None, offset): yield sent + offset += sent else: _copy_file_range = None @@ -91,14 +88,12 @@ def _sendfile(source_fd, target_fd): high-performance sendfile(2) syscall. This should work on Linux >= 2.6.33 only. """ + fn = os.sendfile blocksize = _get_copy_blocksize(source_fd) offset = 0 - while True: - sent = os.sendfile(target_fd, source_fd, offset, blocksize) - if sent == 0: - break # EOF - offset += sent + while sent := fn(target_fd, source_fd, offset, blocksize): yield sent + offset += sent else: _sendfile = None diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py index b77e79dfc51d53..b4643bf56db767 100644 --- a/Lib/pathlib/types.py +++ b/Lib/pathlib/types.py @@ -414,7 +414,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None): def _copy_from(self, source, follow_symlinks=True): """ Recursively copy the given path to this path. This a generator - function that yields (target, source, part_size) tuples as the copying + function that yields (target, source, sent) tuples as the copying operation progresses. """ stack = [(self, source)] @@ -432,8 +432,8 @@ def _copy_from(self, source, follow_symlinks=True): ensure_different_files(src, dst) with magic_open(src, 'rb') as source_f: with magic_open(dst, 'wb') as target_f: - for part_size in copyfileobj(source_f, target_f): - yield dst, src, part_size + for sent in copyfileobj(source_f, target_f): + yield dst, src, sent _JoinablePath.register(PurePath)