From 53f02f568bfc27a649f866cf07e79e1d292a95f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 16 May 2025 12:25:21 +0200 Subject: [PATCH 1/4] ensure that we can create AF_UNIX socket files --- Lib/multiprocessing/connection.py | 2 +- Lib/multiprocessing/util.py | 36 ++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 5f288a8d393240..fc00d2861260a8 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -76,7 +76,7 @@ def arbitrary_address(family): if family == 'AF_INET': return ('localhost', 0) elif family == 'AF_UNIX': - return tempfile.mktemp(prefix='listener-', dir=util.get_temp_dir()) + return tempfile.mktemp(prefix='sock-', dir=util.get_temp_dir()) elif family == 'AF_PIPE': return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' % (os.getpid(), next(_mmap_counter)), dir="") diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index b7192042b9cf47..5ccc4b9c59bd6a 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -121,6 +121,21 @@ def is_abstract_socket_namespace(address): # Function returning a temp directory which will be removed on exit # +# Maximum length of a socket file path is usually between 92 and 108 [1]. +# BSD-based operating systems usually use 104 (OpenBSD, FreeBSD, macOS) +# and Linux uses 108 [2]. +# +# [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html +# [2]: https://man7.org/linux/man-pages/man7/unix.7.html. + +if sys.platform == 'linux': + _SUN_PATH_MAX = 104 +elif sys.platform.startswith(('openbsd', 'freebsd')): + _SUN_PATH_MAX = 108 +else: + # On Windows platforms, we do not create AF_UNIX sockets. + _SUN_PATH_MAX = None if os.name == 'nt' else 92 + def _remove_temp_dir(rmtree, tempdir): rmtree(tempdir) @@ -135,7 +150,26 @@ def get_temp_dir(): tempdir = process.current_process()._config.get('tempdir') if tempdir is None: import shutil, tempfile - tempdir = tempfile.mkdtemp(prefix='pymp-') + if os.name == 'nt': + tempdir = tempfile.mkdtemp(prefix='pymp-') + else: + # Most of the time, the root temporary directory is /tmp, and thus + # listener sockets files "$TMPDIR/pymp-XXXXXXXX/sock-XXXXXXXX" + # do not have a path length exceeding SUN_PATH_MAX. + # + # If users specify their own temporary directory, we may be unable + # to create those files. Therefore, we fall back to the system-wide + # temporary directory /tmp, assumed to exist on POSIX systems. + # + # See https://github.com/python/cpython/issues/132124. + base_tempdir = tempfile.gettempdir() + # len(base_tempdir) + len('/pymp-XXXXXXXX') + len('/sock-XXXXXXXX') + sun_path_len = len(base_tempdir) + 14 + 14 + if sun_path_len > _SUN_PATH_MAX: + # fallback to the system-wide temporary directory, + # ignoring environment variables. + base_tempdir = '/tmp' + tempdir = tempfile.mkdtemp(prefix='pymp-', dir=base_tempdir) info('created temp directory %s', tempdir) # keep a strong reference to shutil.rmtree(), since the finalizer # can be called late during Python shutdown From 453fc7bd68b867f1989113a2de55af07996d3f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 16 May 2025 12:40:41 +0200 Subject: [PATCH 2/4] blurb --- .../Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst b/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst new file mode 100644 index 00000000000000..acf3577ece4e9c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-16-12-40-37.gh-issue-132124.T_5Odx.rst @@ -0,0 +1,6 @@ +On POSIX-compliant systems, :func:`!multiprocessing.util.get_temp_dir` now +ignores :envvar:`TMPDIR` (and similar environment variables) if the path +length of ``AF_UNIX`` socket files exceeds the platform-specific maximum +length when using the :ref:`forkserver +` start method. Patch by Bénédikt +Tran. From 84f7020eaaae80fb9064af009f796b25783565d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 17 May 2025 12:09:22 +0200 Subject: [PATCH 3/4] emit a warning if system-wide temporary directory is used --- Lib/multiprocessing/util.py | 93 ++++++++++++++++++++++++++----------- Lib/tempfile.py | 5 +- 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index 5ccc4b9c59bd6a..eda1a76e3dc68f 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -19,7 +19,7 @@ from . import process __all__ = [ - 'sub_debug', 'debug', 'info', 'sub_warning', 'get_logger', + 'sub_debug', 'debug', 'info', 'sub_warning', 'warn', 'get_logger', 'log_to_stderr', 'get_temp_dir', 'register_after_fork', 'is_exiting', 'Finalize', 'ForkAwareThreadLock', 'ForkAwareLocal', 'close_all_fds_except', 'SUBDEBUG', 'SUBWARNING', @@ -34,6 +34,7 @@ DEBUG = 10 INFO = 20 SUBWARNING = 25 +WARNING = 30 LOGGER_NAME = 'multiprocessing' DEFAULT_LOGGING_FORMAT = '[%(levelname)s/%(processName)s] %(message)s' @@ -53,6 +54,10 @@ def info(msg, *args): if _logger: _logger.log(INFO, msg, *args, stacklevel=2) +def warn(msg, *args): + if _logger: + _logger.log(WARNING, msg, *args, stacklevel=2) + def sub_warning(msg, *args): if _logger: _logger.log(SUBWARNING, msg, *args, stacklevel=2) @@ -121,17 +126,17 @@ def is_abstract_socket_namespace(address): # Function returning a temp directory which will be removed on exit # -# Maximum length of a socket file path is usually between 92 and 108 [1]. -# BSD-based operating systems usually use 104 (OpenBSD, FreeBSD, macOS) -# and Linux uses 108 [2]. +# Maximum length of a socket file path is usually between 92 and 108 [1], +# but Linux is known to use a size of 108 [2]. BSD-based systems usually +# use a size of 104 or 108 and Windows does not create AF_UNIX sockets. # # [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html # [2]: https://man7.org/linux/man-pages/man7/unix.7.html. if sys.platform == 'linux': - _SUN_PATH_MAX = 104 -elif sys.platform.startswith(('openbsd', 'freebsd')): _SUN_PATH_MAX = 108 +elif sys.platform.startswith(('openbsd', 'freebsd')): + _SUN_PATH_MAX = 104 else: # On Windows platforms, we do not create AF_UNIX sockets. _SUN_PATH_MAX = None if os.name == 'nt' else 92 @@ -145,31 +150,67 @@ def _remove_temp_dir(rmtree, tempdir): if current_process is not None: current_process._config['tempdir'] = None +def _get_base_temp_dir(tempfile): + """Get a temporary directory where socket files will be created. + + To prevent additional imports, pass a pre-imported 'tempfile' module. + """ + if os.name == 'nt': + return None + # Most of the time, the default temporary directory is /tmp. Thus, + # listener sockets files "$TMPDIR/pymp-XXXXXXXX/sock-XXXXXXXX" do + # not have a path length exceeding SUN_PATH_MAX. + # + # If users specify their own temporary directory, we may be unable + # to create those files. Therefore, we fall back to the system-wide + # temporary directory /tmp, assumed to exist on POSIX systems. + # + # See https://github.com/python/cpython/issues/132124. + base_tempdir = tempfile.gettempdir() + # Files created in a temporary directory are suffixed by a string + # generated by tempfile._RandomNameSequence, which, by design, + # is 8 characters long. + # + # Thus, the length of socket filename will be: + # + # len(base_tempdir + '/pymp-XXXXXXXX' + '/sock-XXXXXXXX') + sun_path_len = len(base_tempdir) + 14 + 14 + if sun_path_len <= _SUN_PATH_MAX: + return base_tempdir + # Fallback to the default system-wide temporary directory. + # This ignores user-defined environment variables. + # + # On POSIX systems, /tmp MUST be writable by any application [1]. + # We however emit a warning if this is not the case to prevent + # obscure errors later in the execution. + # + # On some legacy systems, /var/tmp and /usr/tmp can be present + # and will be used instead. + # + # [1]: https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s18.html + dirlist = ['/tmp', '/var/tmp', '/usr/tmp'] + try: + base_system_tempdir = tempfile._get_default_tempdir(dirlist) + except FileNotFoundError: + warn("Process-wide temporary directory %r will not be usable for " + "creating socket files and no usable system-wide temporary " + "directory was found in %r", base_tempdir, dirlist) + # At this point, the system-wide temporary directory is not usable + # but we may assume that the user-defined one is, even if we will + # not be able to write socket files out there. + return base_tempdir + warn("Ignoring user-defined temporary directory: %s", base_tempdir) + # at most max(map(len, dirlist)) + 14 + 14 = 36 characters + assert len(base_system_tempdir) + 14 + 14 <= _SUN_PATH_MAX + return base_system_tempdir + def get_temp_dir(): # get name of a temp directory which will be automatically cleaned up tempdir = process.current_process()._config.get('tempdir') if tempdir is None: import shutil, tempfile - if os.name == 'nt': - tempdir = tempfile.mkdtemp(prefix='pymp-') - else: - # Most of the time, the root temporary directory is /tmp, and thus - # listener sockets files "$TMPDIR/pymp-XXXXXXXX/sock-XXXXXXXX" - # do not have a path length exceeding SUN_PATH_MAX. - # - # If users specify their own temporary directory, we may be unable - # to create those files. Therefore, we fall back to the system-wide - # temporary directory /tmp, assumed to exist on POSIX systems. - # - # See https://github.com/python/cpython/issues/132124. - base_tempdir = tempfile.gettempdir() - # len(base_tempdir) + len('/pymp-XXXXXXXX') + len('/sock-XXXXXXXX') - sun_path_len = len(base_tempdir) + 14 + 14 - if sun_path_len > _SUN_PATH_MAX: - # fallback to the system-wide temporary directory, - # ignoring environment variables. - base_tempdir = '/tmp' - tempdir = tempfile.mkdtemp(prefix='pymp-', dir=base_tempdir) + base_tempdir = _get_base_temp_dir(tempfile) + tempdir = tempfile.mkdtemp(prefix='pymp-', dir=base_tempdir) info('created temp directory %s', tempdir) # keep a strong reference to shutil.rmtree(), since the finalizer # can be called late during Python shutdown diff --git a/Lib/tempfile.py b/Lib/tempfile.py index cadb0bed3cce3b..5e3ccab5f48502 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -180,7 +180,7 @@ def _candidate_tempdir_list(): return dirlist -def _get_default_tempdir(): +def _get_default_tempdir(dirlist=None): """Calculate the default directory to use for temporary files. This routine should be called exactly once. @@ -190,7 +190,8 @@ def _get_default_tempdir(): service, the name of the test file must be randomized.""" namer = _RandomNameSequence() - dirlist = _candidate_tempdir_list() + if dirlist is None: + dirlist = _candidate_tempdir_list() for dir in dirlist: if dir != _os.curdir: From 4c3de67be62333aee343b94c125035380c0dbbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 17 May 2025 12:19:12 +0200 Subject: [PATCH 4/4] use '%s' consistently --- Lib/multiprocessing/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index eda1a76e3dc68f..a1a537dd48dea7 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -192,9 +192,9 @@ def _get_base_temp_dir(tempfile): try: base_system_tempdir = tempfile._get_default_tempdir(dirlist) except FileNotFoundError: - warn("Process-wide temporary directory %r will not be usable for " + warn("Process-wide temporary directory %s will not be usable for " "creating socket files and no usable system-wide temporary " - "directory was found in %r", base_tempdir, dirlist) + "directory was found in %s", base_tempdir, dirlist) # At this point, the system-wide temporary directory is not usable # but we may assume that the user-defined one is, even if we will # not be able to write socket files out there.