Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions 15 Doc/c-api/sys.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Operating System Utilities
Encoding, highest priority to lowest priority:

* ``UTF-8`` on macOS and Android;
* ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
* ``UTF-8`` if the Python UTF-8 mode is enabled;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
Expand Down Expand Up @@ -140,6 +141,10 @@ Operating System Utilities
.. versionchanged:: 3.7
The function now uses the UTF-8 encoding in the UTF-8 mode.

.. versionchanged:: 3.8
The function now uses the UTF-8 encoding on Windows if
:c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;


.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)

Expand All @@ -150,6 +155,7 @@ Operating System Utilities
Encoding, highest priority to lowest priority:

* ``UTF-8`` on macOS and Android;
* ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
* ``UTF-8`` if the Python UTF-8 mode is enabled;
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
Expand All @@ -169,9 +175,6 @@ Operating System Utilities
Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
to a wide character string.

.. versionchanged:: 3.7
The function now uses the UTF-8 encoding in the UTF-8 mode.

.. seealso::

The :c:func:`PyUnicode_EncodeFSDefault` and
Expand All @@ -180,7 +183,11 @@ Operating System Utilities
.. versionadded:: 3.5

.. versionchanged:: 3.7
The function now supports the UTF-8 mode.
The function now uses the UTF-8 encoding in the UTF-8 mode.

.. versionchanged:: 3.8
The function now uses the UTF-8 encoding on Windows if
:c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;


.. _systemfunctions:
Expand Down
39 changes: 19 additions & 20 deletions 39 Lib/test/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,10 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'dump_refs': 0,
'malloc_stats': 0,

# None means that the default encoding is read at runtime:
# see get_locale_encoding().
# None means that the value is get by get_locale_encoding()
'filesystem_encoding': None,
'filesystem_errors': sys.getfilesystemencodeerrors(),
'filesystem_errors': None,

'utf8_mode': 0,
'coerce_c_locale': 0,
'coerce_c_locale_warn': 0,
Expand All @@ -294,7 +294,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'quiet': 0,
'user_site_directory': 1,
'buffered_stdio': 1,
# None means that check_config() gets the expected encoding at runtime

# None means that the value is get by get_stdio_encoding()
'stdio_encoding': None,
'stdio_errors': None,

Expand All @@ -303,7 +304,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'_frozen': 0,
}


def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code)
Expand All @@ -315,18 +315,12 @@ def get_stdio_encoding(self, env):
out = proc.stdout.rstrip()
return out.split()

def get_locale_encoding(self, isolated):
if sys.platform in ('win32', 'darwin') or support.is_android:
# Windows, macOS and Android use UTF-8
return "utf-8"

code = ('import codecs, locale, sys',
'locale.setlocale(locale.LC_CTYPE, "")',
'enc = locale.nl_langinfo(locale.CODESET)',
'enc = codecs.lookup(enc).name',
'print(enc)')
args = (sys.executable, '-c', '; '.join(code))
env = dict(os.environ)
def get_filesystem_encoding(self, isolated, env):
code = ('import codecs, locale, sys; '
'print(sys.getfilesystemencoding(), '
'sys.getfilesystemencodeerrors())')
args = (sys.executable, '-c', code)
env = dict(env)
if not isolated:
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
Expand All @@ -336,7 +330,8 @@ def get_locale_encoding(self, isolated):
if proc.returncode:
raise Exception(f"failed to get the locale encoding: "
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
return proc.stdout.rstrip()
out = proc.stdout.rstrip()
return out.split()

def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected)
Expand All @@ -356,8 +351,12 @@ def check_config(self, testname, expected):
expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1]
if expected['filesystem_encoding'] is None:
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
if expected['filesystem_encoding'] is None or expected['filesystem_errors'] is None:
res = self.get_filesystem_encoding(expected['isolated'], env)
if expected['filesystem_encoding'] is None:
expected['filesystem_encoding'] = res[0]
if expected['filesystem_errors'] is None:
expected['filesystem_errors'] = res[1]
for key, value in expected.items():
expected[key] = str(value)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on
Windows if Py_LegacyWindowsFSEncodingFlag is zero.
13 changes: 11 additions & 2 deletions 13 Modules/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
_PyCmdline *cmdline)
{
int init_utf8_mode = Py_UTF8Mode;
#ifdef MS_WINDOWS
int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
#endif
_PyCoreConfig save_config = _PyCoreConfig_INIT;
int res = -1;

Expand All @@ -1313,9 +1316,12 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
goto done;
}

/* bpo-34207: Py_DecodeLocale(), Py_EncodeLocale() and similar
functions depend on Py_UTF8Mode. */
/* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
Py_UTF8Mode = config->utf8_mode;
#ifdef MS_WINDOWS
Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
#endif

if (pymain_init_cmdline_argv(pymain, config, cmdline) < 0) {
goto done;
Expand Down Expand Up @@ -1380,6 +1386,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
done:
_PyCoreConfig_Clear(&save_config);
Py_UTF8Mode = init_utf8_mode ;
#ifdef MS_WINDOWS
Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
#endif
return res;
}

Expand Down
16 changes: 12 additions & 4 deletions 16 Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,13 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
surrogateescape);
#else
if (Py_UTF8Mode == 1) {
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
surrogateescape);
int use_utf8 = (Py_UTF8Mode == 1);
#ifdef MS_WINDOWS
use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
#endif
if (use_utf8) {
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen,
reason, surrogateescape);
}

#ifdef USE_FORCE_ASCII
Expand Down Expand Up @@ -661,7 +665,11 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, surrogateescape);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
int use_utf8 = (Py_UTF8Mode == 1);
#ifdef MS_WINDOWS
use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
#endif
if (use_utf8) {
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, surrogateescape);
}
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.