40
40
log = logging .getLogger (__name__ )
41
41
42
42
43
+ def _get_windows_ansi_encoding ():
44
+ """Get the encoding specified by the Windows system-wide ANSI active code page."""
45
+ # locale.getencoding may work but is only in Python 3.11+. Use the registry instead.
46
+ import winreg
47
+
48
+ hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage"
49
+ with winreg .OpenKey (winreg .HKEY_LOCAL_MACHINE , hklm_path ) as key :
50
+ value , _ = winreg .QueryValueEx (key , "ACP" )
51
+ return f"cp{ value } "
52
+
53
+
43
54
@sumtype
44
55
class WinBashStatus :
45
56
"""Status of bash.exe for native Windows. Affects which commit hook tests can pass.
46
57
47
- Call :meth:`check` to check the status.
48
-
49
- The :class:`CheckError` and :class:`WinError` cases should not typically be used in
50
- ``skip`` or ``xfail`` mark conditions, because they represent unexpected situations.
58
+ Call check() to check the status. (CheckError and WinError should not typically be
59
+ used to trigger skip or xfail, because they represent unexpected situations.)
51
60
"""
52
61
53
62
Inapplicable = constructor ()
54
63
"""This system is not native Windows: either not Windows at all, or Cygwin."""
55
64
56
65
Absent = constructor ()
57
- """No command for `` bash.exe`` is found on the system."""
66
+ """No command for bash.exe is found on the system."""
58
67
59
68
Native = constructor ()
60
- """Running `` bash.exe`` operates outside any WSL distribution (as with Git Bash)."""
69
+ """Running bash.exe operates outside any WSL distribution (as with Git Bash)."""
61
70
62
71
Wsl = constructor ()
63
- """Running `` bash.exe`` calls `` bash`` in a WSL distribution."""
72
+ """Running bash.exe calls bash in a WSL distribution."""
64
73
65
74
WslNoDistro = constructor ("process" , "message" )
66
- """Running `` bash.exe` tries to run bash on a WSL distribution, but none exists."""
75
+ """Running bash.exe tries to run bash on a WSL distribution, but none exists."""
67
76
68
77
CheckError = constructor ("process" , "message" )
69
- """Running `` bash.exe`` fails in an unexpected error or gives unexpected output."""
78
+ """Running bash.exe fails in an unexpected error or gives unexpected output."""
70
79
71
80
WinError = constructor ("exception" )
72
- """`` bash.exe`` may exist but can't run. `` CreateProcessW`` fails unexpectedly."""
81
+ """bash.exe may exist but can't run. CreateProcessW fails unexpectedly."""
73
82
74
83
@classmethod
75
84
def check (cls ):
76
- """Check the status of the ``bash.exe`` :func:`index.fun.run_commit_hook` uses.
77
-
78
- This uses EAFP, attempting to run a command via ``bash.exe``. Which ``bash.exe``
79
- is used can't be reliably discovered by :func:`shutil.which`, which approximates
80
- how a shell is expected to search for an executable. On Windows, there are major
81
- differences between how executables are found by a shell and otherwise. (This is
82
- the cmd.exe Windows shell, and shouldn't be confused with bash.exe itself. That
83
- the command being looked up also happens to be an interpreter is not relevant.)
84
-
85
- :func:`index.fun.run_commit_hook` uses :class:`subprocess.Popen`, including when
86
- it runs ``bash.exe`` on Windows. It doesn't pass ``shell=True`` (and shouldn't).
87
- On Windows, `Popen` calls ``CreateProcessW``, which checks some locations before
88
- using the ``PATH`` environment variable. It is expected to try the ``System32``
89
- directory, even if another directory containing the executable precedes it in
90
- ``PATH``. (The other differences are less relevant here.) When WSL is present,
91
- even with no distributions, ``bash.exe`` usually exists in ``System32``, and
92
- `Popen` finds it even if another ``bash.exe`` precedes it in ``PATH``, as on CI.
93
- If WSL is absent, ``System32`` may still have ``bash.exe``, as Windows users and
94
- administrators occasionally put executables there in lieu of extending ``PATH``.
85
+ """Check the status of the bash.exe that run_commit_hook will try to use.
86
+
87
+ This runs a command with bash.exe and checks the result. On Windows, shell and
88
+ non-shell executable search differ; shutil.which often finds the wrong bash.exe.
89
+
90
+ run_commit_hook uses Popen, including to run bash.exe on Windows. It doesn't
91
+ pass shell=True (and shouldn't). On Windows, Popen calls CreateProcessW, which
92
+ checks some locations before using the PATH environment variable. It is expected
93
+ to try System32, even if another directory with the executable precedes it in
94
+ PATH. When WSL is present, even with no distributions, bash.exe usually exists
95
+ in System32; Popen finds it even if a shell would run another one, as on CI.
96
+ (Without WSL, System32 may still have bash.exe; users sometimes put it there.)
95
97
"""
96
98
if os .name != "nt" :
97
99
return cls .Inapplicable ()
@@ -124,39 +126,35 @@ def check(cls):
124
126
125
127
@staticmethod
126
128
def _decode (stdout ):
127
- """Decode `` bash.exe`` output as best we can. (This is used only on Windows.) """
129
+ """Decode bash.exe output as best we can."""
128
130
# When bash.exe is the WSL wrapper but the output is from WSL itself rather than
129
131
# code running in a distribution, the output is often in UTF-16LE, which Windows
130
132
# uses internally. The UTF-16LE representation of a Windows-style line ending is
131
133
# rarely seen otherwise, so use it to detect this situation.
132
134
if b"\r \0 \n \0 " in stdout :
133
135
return stdout .decode ("utf-16le" )
134
136
135
- import winreg
136
-
137
- # At this point, the output is probably either empty or not UTF-16LE. It's often
138
- # UTF-8 from inside a WSL distro or a non-WSL bash shell. But our test command
139
- # only uses the ASCII subset, so it's safe to guess wrong for that command's
140
- # output. Errors from inside a WSL distro or non-WSL bash.exe are arbitrary, but
141
- # unlike WSL's own messages, go to stderr, not stdout. So we can try the system
142
- # active code page first. (Although console programs usually use the OEM code
143
- # page, the ACP seems more accurate here. For example, on en-US Windows set to
144
- # fr-FR, the message, if not UTF-16LE, is windows-1252, same as the ACP, while
145
- # the OEM code page on such a system defaults to 437, which can't decode it.)
146
- hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage"
147
- with winreg .OpenKey (winreg .HKEY_LOCAL_MACHINE , hklm_path ) as key :
148
- value , _ = winreg .QueryValueEx (key , "ACP" )
137
+ # At this point, the output is either blank or probably not UTF-16LE. It's often
138
+ # UTF-8 from inside a WSL distro or non-WSL bash shell. Our test command only
139
+ # uses the ASCII subset, so we can safely guess a wrong code page for it. Errors
140
+ # from such an environment can contain any text, but unlike WSL's own messages,
141
+ # they go to stderr, not stdout. So we can try the system ANSI code page first.
142
+ # (Console programs often use the OEM code page, but the ACP seems more accurate
143
+ # here. For example, on en-US Windows with the original system code page but the
144
+ # display language set to fr-FR, the message, if not UTF-16LE, is windows-1252,
145
+ # same as the ACP, while the OEMCP is 437, which can't decode its accents.)
146
+ acp = _get_windows_ansi_encoding ()
149
147
try :
150
- return stdout .decode (f"cp { value } " )
148
+ return stdout .decode (acp )
151
149
except UnicodeDecodeError :
152
150
pass
153
151
except LookupError as error :
154
152
log .warning ("%s" , str (error )) # Message already says "Unknown encoding:".
155
153
156
- # Assume UTF-8. If we don't have valid UTF-8 , substitute Unicode replacement
157
- # characters. (For example, on zh-CN Windows set to fr-FR, error messages from
158
- # WSL itself, if not UTF-16LE, are in windows-1252, even though the ACP and OEM
159
- # code pages are 936; decoding as code page 936 or as UTF-8 both have errors.)
154
+ # Assume UTF-8. If invalid , substitute Unicode replacement characters. (For
155
+ # example, on zh-CN Windows set to display fr-FR, errors from WSL itself, if not
156
+ # UTF-16LE, are in windows-1252, even though the ANSI and OEM code pages both
157
+ # default to 936, and decoding as code page 936 or as UTF-8 both have errors.)
160
158
return stdout .decode ("utf-8" , errors = "replace" )
161
159
162
160
0 commit comments