Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 59d4856

Browse filesBrowse files
vstinneradorilson
authored andcommitted
bpo-42208: Add _Py_GetLocaleEncoding() (pythonGH-23050)
_io.TextIOWrapper no longer calls getpreferredencoding(False) of _bootlocale to get the locale encoding, but calls _Py_GetLocaleEncoding() instead. Add config_get_fs_encoding() sub-function. Reorganize also config_get_locale_encoding() code.
1 parent 9b67161 commit 59d4856
Copy full SHA for 59d4856

File tree

6 files changed

+112
-110
lines changed
Filter options

6 files changed

+112
-110
lines changed

‎Include/internal/pycore_fileutils.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_fileutils.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
5050

5151
PyAPI_FUNC(void) _Py_closerange(int first, int last);
5252

53+
PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void);
54+
5355
#ifdef __cplusplus
5456
}
5557
#endif

‎Modules/_io/_iomodule.c

Copy file name to clipboardExpand all lines: Modules/_io/_iomodule.c
-25Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -593,31 +593,6 @@ _PyIO_get_module_state(void)
593593
return state;
594594
}
595595

596-
PyObject *
597-
_PyIO_get_locale_module(_PyIO_State *state)
598-
{
599-
PyObject *mod;
600-
if (state->locale_module != NULL) {
601-
assert(PyWeakref_CheckRef(state->locale_module));
602-
mod = PyWeakref_GET_OBJECT(state->locale_module);
603-
if (mod != Py_None) {
604-
Py_INCREF(mod);
605-
return mod;
606-
}
607-
Py_CLEAR(state->locale_module);
608-
}
609-
mod = PyImport_ImportModule("_bootlocale");
610-
if (mod == NULL)
611-
return NULL;
612-
state->locale_module = PyWeakref_NewRef(mod, NULL);
613-
if (state->locale_module == NULL) {
614-
Py_DECREF(mod);
615-
return NULL;
616-
}
617-
return mod;
618-
}
619-
620-
621596
static int
622597
iomodule_traverse(PyObject *mod, visitproc visit, void *arg) {
623598
_PyIO_State *state = get_io_state(mod);

‎Modules/_io/_iomodule.h

Copy file name to clipboardExpand all lines: Modules/_io/_iomodule.h
-1Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ typedef struct {
150150
#define IO_STATE() _PyIO_get_module_state()
151151

152152
extern _PyIO_State *_PyIO_get_module_state(void);
153-
extern PyObject *_PyIO_get_locale_module(_PyIO_State *);
154153

155154
#ifdef MS_WINDOWS
156155
extern char _PyIO_get_console_type(PyObject *);

‎Modules/_io/textio.c

Copy file name to clipboardExpand all lines: Modules/_io/textio.c
+4-22Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "Python.h"
1111
#include "pycore_interp.h" // PyInterpreterState.fs_codec
1212
#include "pycore_long.h" // _PyLong_GetZero()
13+
#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
1314
#include "pycore_object.h"
1415
#include "pycore_pystate.h" // _PyInterpreterState_GET()
1516
#include "structmember.h" // PyMemberDef
@@ -27,7 +28,6 @@ _Py_IDENTIFIER(_dealloc_warn);
2728
_Py_IDENTIFIER(decode);
2829
_Py_IDENTIFIER(fileno);
2930
_Py_IDENTIFIER(flush);
30-
_Py_IDENTIFIER(getpreferredencoding);
3131
_Py_IDENTIFIER(isatty);
3232
_Py_IDENTIFIER(mode);
3333
_Py_IDENTIFIER(name);
@@ -1155,29 +1155,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
11551155
}
11561156
}
11571157
if (encoding == NULL && self->encoding == NULL) {
1158-
PyObject *locale_module = _PyIO_get_locale_module(state);
1159-
if (locale_module == NULL)
1160-
goto catch_ImportError;
1161-
self->encoding = _PyObject_CallMethodIdOneArg(
1162-
locale_module, &PyId_getpreferredencoding, Py_False);
1163-
Py_DECREF(locale_module);
1158+
self->encoding = _Py_GetLocaleEncoding();
11641159
if (self->encoding == NULL) {
1165-
catch_ImportError:
1166-
/*
1167-
Importing locale can raise an ImportError because of
1168-
_functools, and locale.getpreferredencoding can raise an
1169-
ImportError if _locale is not available. These will happen
1170-
during module building.
1171-
*/
1172-
if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1173-
PyErr_Clear();
1174-
self->encoding = PyUnicode_FromString("ascii");
1175-
}
1176-
else
1177-
goto error;
1160+
goto error;
11781161
}
1179-
else if (!PyUnicode_Check(self->encoding))
1180-
Py_CLEAR(self->encoding);
1162+
assert(PyUnicode_Check(self->encoding));
11811163
}
11821164
if (self->encoding != NULL) {
11831165
encoding = PyUnicode_AsUTF8(self->encoding);

‎Python/fileutils.c

Copy file name to clipboardExpand all lines: Python/fileutils.c
+42-1Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "Python.h"
2-
#include "pycore_fileutils.h"
2+
#include "pycore_fileutils.h" // fileutils definitions
3+
#include "pycore_runtime.h" // _PyRuntime
34
#include "osdefs.h" // SEP
45
#include <locale.h>
56

@@ -820,6 +821,46 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
820821
}
821822

822823

824+
// Get the current locale encoding: locale.getpreferredencoding(False).
825+
// See also config_get_locale_encoding()
826+
PyObject *
827+
_Py_GetLocaleEncoding(void)
828+
{
829+
#ifdef _Py_FORCE_UTF8_LOCALE
830+
// On Android langinfo.h and CODESET are missing,
831+
// and UTF-8 is always used in mbstowcs() and wcstombs().
832+
return PyUnicode_FromString("UTF-8");
833+
#else
834+
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
835+
if (preconfig->utf8_mode) {
836+
return PyUnicode_FromString("UTF-8");
837+
}
838+
839+
#if defined(MS_WINDOWS)
840+
return PyUnicode_FromFormat("cp%u", GetACP());
841+
#else
842+
const char *encoding = nl_langinfo(CODESET);
843+
if (!encoding || encoding[0] == '\0') {
844+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
845+
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
846+
// not supported. Default to UTF-8 in that case, because UTF-8 is the
847+
// default charset on macOS.
848+
encoding = "UTF-8";
849+
#else
850+
PyErr_SetString(PyExc_ValueError,
851+
"failed to get the locale encoding: "
852+
"nl_langinfo(CODESET) returns an empty string");
853+
return NULL;
854+
#endif
855+
}
856+
// Decode from UTF-8
857+
return PyUnicode_FromString(encoding);
858+
#endif // !CODESET
859+
860+
#endif
861+
}
862+
863+
823864
#ifdef MS_WINDOWS
824865
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
825866

‎Python/initconfig.c

Copy file name to clipboardExpand all lines: Python/initconfig.c
+64-61Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
766766
configured. */
767767
PyStatus
768768
PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
769-
const char *str)
769+
const char *str)
770770
{
771771
return CONFIG_SET_BYTES_STR(config, config_str, str, "string");
772772
}
@@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
14661466

14671467

14681468
static const wchar_t *
1469-
config_get_stdio_errors(void)
1469+
config_get_stdio_errors(const PyPreConfig *preconfig)
14701470
{
1471+
if (preconfig->utf8_mode) {
1472+
/* UTF-8 Mode uses UTF-8/surrogateescape */
1473+
return L"surrogateescape";
1474+
}
1475+
14711476
#ifndef MS_WINDOWS
14721477
const char *loc = setlocale(LC_CTYPE, NULL);
14731478
if (loc != NULL) {
@@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
14921497
}
14931498

14941499

1500+
// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
14951501
static PyStatus
1496-
config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding)
1502+
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
1503+
wchar_t **locale_encoding)
14971504
{
1505+
#ifdef _Py_FORCE_UTF8_LOCALE
1506+
return PyConfig_SetString(config, locale_encoding, L"utf-8");
1507+
#else
1508+
if (preconfig->utf8_mode) {
1509+
return PyConfig_SetString(config, locale_encoding, L"utf-8");
1510+
}
1511+
14981512
#ifdef MS_WINDOWS
14991513
char encoding[20];
15001514
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
15011515
return PyConfig_SetBytesString(config, locale_encoding, encoding);
1502-
#elif defined(_Py_FORCE_UTF8_LOCALE)
1503-
return PyConfig_SetString(config, locale_encoding, L"utf-8");
15041516
#else
15051517
const char *encoding = nl_langinfo(CODESET);
15061518
if (!encoding || encoding[0] == '\0') {
1519+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1520+
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
1521+
// not supported. Default to UTF-8 in that case, because UTF-8 is the
1522+
// default charset on macOS.
1523+
encoding = "UTF-8";
1524+
#else
15071525
return _PyStatus_ERR("failed to get the locale encoding: "
1508-
"nl_langinfo(CODESET) failed");
1526+
"nl_langinfo(CODESET) returns an empty string");
1527+
#endif
15091528
}
15101529
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
15111530
return CONFIG_SET_BYTES_STR(config,
15121531
locale_encoding, encoding,
15131532
"nl_langinfo(CODESET)");
1514-
#endif
1533+
#endif // !MS_WINDOWS
1534+
#endif // !_Py_FORCE_UTF8_LOCALE
15151535
}
15161536

15171537

@@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
15961616
PyMem_RawFree(pythonioencoding);
15971617
}
15981618

1599-
/* UTF-8 Mode uses UTF-8/surrogateescape */
1600-
if (preconfig->utf8_mode) {
1601-
if (config->stdio_encoding == NULL) {
1602-
status = PyConfig_SetString(config, &config->stdio_encoding,
1603-
L"utf-8");
1604-
if (_PyStatus_EXCEPTION(status)) {
1605-
return status;
1606-
}
1607-
}
1608-
if (config->stdio_errors == NULL) {
1609-
status = PyConfig_SetString(config, &config->stdio_errors,
1610-
L"surrogateescape");
1611-
if (_PyStatus_EXCEPTION(status)) {
1612-
return status;
1613-
}
1614-
}
1615-
}
1616-
16171619
/* Choose the default error handler based on the current locale. */
16181620
if (config->stdio_encoding == NULL) {
1619-
status = config_get_locale_encoding(config, &config->stdio_encoding);
1621+
status = config_get_locale_encoding(config, preconfig,
1622+
&config->stdio_encoding);
16201623
if (_PyStatus_EXCEPTION(status)) {
16211624
return status;
16221625
}
16231626
}
16241627
if (config->stdio_errors == NULL) {
1625-
const wchar_t *errors = config_get_stdio_errors();
1628+
const wchar_t *errors = config_get_stdio_errors(preconfig);
16261629
assert(errors != NULL);
16271630

16281631
status = PyConfig_SetString(config, &config->stdio_errors, errors);
@@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
16351638
}
16361639

16371640

1641+
// See also config_get_locale_encoding()
1642+
static PyStatus
1643+
config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
1644+
wchar_t **fs_encoding)
1645+
{
1646+
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1647+
return PyConfig_SetString(config, fs_encoding, L"utf-8");
1648+
#elif defined(MS_WINDOWS)
1649+
const wchar_t *encoding;
1650+
if (preconfig->legacy_windows_fs_encoding) {
1651+
// Legacy Windows filesystem encoding: mbcs/replace
1652+
encoding = L"mbcs";
1653+
}
1654+
else {
1655+
// Windows defaults to utf-8/surrogatepass (PEP 529)
1656+
encoding = L"utf-8";
1657+
}
1658+
return PyConfig_SetString(config, fs_encoding, encoding);
1659+
#else // !MS_WINDOWS
1660+
if (preconfig->utf8_mode) {
1661+
return PyConfig_SetString(config, fs_encoding, L"utf-8");
1662+
}
1663+
else if (_Py_GetForceASCII()) {
1664+
return PyConfig_SetString(config, fs_encoding, L"ascii");
1665+
}
1666+
else {
1667+
return config_get_locale_encoding(config, preconfig, fs_encoding);
1668+
}
1669+
#endif // !MS_WINDOWS
1670+
}
1671+
1672+
16381673
static PyStatus
16391674
config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig)
16401675
{
16411676
PyStatus status;
16421677

16431678
if (config->filesystem_encoding == NULL) {
1644-
#ifdef _Py_FORCE_UTF8_FS_ENCODING
1645-
status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8");
1646-
#else
1647-
1648-
#ifdef MS_WINDOWS
1649-
if (preconfig->legacy_windows_fs_encoding) {
1650-
/* Legacy Windows filesystem encoding: mbcs/replace */
1651-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1652-
L"mbcs");
1653-
}
1654-
else
1655-
#endif
1656-
if (preconfig->utf8_mode) {
1657-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1658-
L"utf-8");
1659-
}
1660-
#ifndef MS_WINDOWS
1661-
else if (_Py_GetForceASCII()) {
1662-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1663-
L"ascii");
1664-
}
1665-
#endif
1666-
else {
1667-
#ifdef MS_WINDOWS
1668-
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
1669-
status = PyConfig_SetString(config, &config->filesystem_encoding,
1670-
L"utf-8");
1671-
#else
1672-
status = config_get_locale_encoding(config,
1673-
&config->filesystem_encoding);
1674-
#endif
1675-
}
1676-
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
1677-
1679+
status = config_get_fs_encoding(config, preconfig,
1680+
&config->filesystem_encoding);
16781681
if (_PyStatus_EXCEPTION(status)) {
16791682
return status;
16801683
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.