From e0b649ce2ce3e8ba255804635fbc90774047aa18 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Sep 2025 15:20:28 +0200 Subject: [PATCH 1/2] gh-74857, PEP 538: Coerce POSIX locale to UTF-8 based locale --- Lib/test/test_c_locale_coercion.py | 7 ++++--- .../2025-09-22-15-21-49.gh-issue-74857.5XRQaA.rst | 2 ++ Python/pylifecycle.c | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-22-15-21-49.gh-issue-74857.5XRQaA.rst diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 10f8ba2255228b..0c992293c15bf8 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -15,7 +15,7 @@ # Set the list of ways we expect to be able to ask for the "C" locale. # 'invalid.ascii' is an invalid LOCALE name and so should get turned in to the # default locale, which is traditionally C. -EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"] +EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "POSIX", "invalid.ascii"] # Set our expectation for the default encoding used in the C locale # for the filesystem encoding and the standard streams @@ -467,8 +467,9 @@ def test_PYTHONCOERCECLOCALE_set_to_one(self): loc = locale.setlocale(locale.LC_CTYPE, "") except locale.Error as e: self.skipTest(str(e)) - if loc == "C": - self.skipTest("test requires LC_CTYPE locale different than C") + if loc in ("C", "POSIX"): + self.skipTest("test requires LC_CTYPE locale different " + "than C and POSIX") if loc in TARGET_LOCALES : self.skipTest("coerced LC_CTYPE locale: %s" % loc) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-22-15-21-49.gh-issue-74857.5XRQaA.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-22-15-21-49.gh-issue-74857.5XRQaA.rst new file mode 100644 index 00000000000000..820b57e920020b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-22-15-21-49.gh-issue-74857.5XRQaA.rst @@ -0,0 +1,2 @@ +:pep:`538`: Coerce the POSIX locale to a UTF-8 based locale. Patch by Victor +Stinner. diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 37af58a68d7883..185c9ae752819a 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -209,7 +209,10 @@ _Py_LegacyLocaleDetected(int warn) * we may also want to check for that explicitly. */ const char *ctype_loc = setlocale(LC_CTYPE, NULL); - return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; + if (ctype_loc == NULL) { + return 0; + } + return (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0); #else /* Windows uses code pages instead of locales, so no locale is legacy */ return 0; From 09a2a32328000d90dbef0be71c074d92a8387dc3 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 22 Sep 2025 16:11:24 +0200 Subject: [PATCH 2/2] Update test_c_locale_coercion --- Lib/test/test_c_locale_coercion.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 0c992293c15bf8..340bec3c71b68f 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -55,11 +55,6 @@ # VxWorks defaults to using UTF-8 for all system interfaces EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" -if sys.platform.startswith("linux"): - # Linux recognizes POSIX as a synonym for C. Python will always coerce - # if the locale is set to POSIX, but not all platforms will use the - # C locale encodings if POSIX is set, so we'll only test it on linux. - EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") # Note that the above expectations are still wrong in some cases, such as: # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set