python · vedant713 · Jul 14, 2025 · Jul 14, 2025 · Jul 14, 2025
@@ -40,6 +40,12 @@
 # syntax classes
 SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3)

+def normalize_surrogates(s: str) -> str:
+    # Encode with surrogatepass, decode to normalize surrogate pairs
+    try:
+        return s.encode('utf-16', 'surrogatepass').decode('utf-16')
+    except UnicodeEncodeError:
+        return s  # fallback if encoding somehow fails

 def make_default_syntax_table() -> dict[str, int]:
    # XXX perhaps should use some unicodedata here?
@@ -759,4 +765,5 @@ def bind(self, spec: KeySpec, command: CommandName) -> None:

    def get_unicode(self) -> str:
        """Return the current buffer as a unicode string."""
-        return "".join(self.buffer)
+        text = "".join(self.buffer)
+        return normalize_surrogates(text)
diff --git a/Misc/NEWS.d/next/Windows/2025-07-14-01-27-42.gh-issue-136595.964PbL.rst b/Misc/NEWS.d/next/Windows/2025-07-14-01-27-42.gh-issue-136595.964PbL.rst
@@ -0,0 +1 @@
+Fix a crash in the REPL on Windows when typing Unicode characters outside the Basic Multilingual Plane (≥ U+10000), such as emoji. These characters are now properly handled as surrogate pairs.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix a crash in the REPL on Windows when typing Unicode characters outside the Basic Multilingual Plane (≥ U+10000), such as emoji. These characters are now properly handled as surrogate pairs.