diff --git a/Lib/ast.py b/Lib/ast.py index 625738ad681af4..ef5ccb03bd766c 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -28,6 +28,7 @@ from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto, _simple_enum +from keyword import iskeyword def parse(source, filename='', mode='exec', *, @@ -668,6 +669,18 @@ def next(self): return self +_MANGLE_INCR = -ord('a') + ord('𝐚') + +def _mangle_keyword(x): + """If the input would be a keyword, replace the first character with a + non-ASCII character that's equivalent according to NFKC. Then it + won't be parsed as a keyword, as desired.""" + return ( + x if x in ('True', 'False', 'None') else + chr(ord(x[0]) + _MANGLE_INCR) + x[1:] if iskeyword(x) else + x) + + _SINGLE_QUOTES = ("'", '"') _MULTI_QUOTES = ('"""', "'''") _ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) @@ -854,7 +867,7 @@ def visit_ImportFrom(self, node): self.fill("from ") self.write("." * node.level) if node.module: - self.write(node.module) + self.write(_mangle_keyword(node.module)) self.write(" import ") self.interleave(lambda: self.write(", "), self.traverse, node.names) @@ -910,13 +923,12 @@ def visit_Assert(self, node): self.write(", ") self.traverse(node.msg) - def visit_Global(self, node): - self.fill("global ") - self.interleave(lambda: self.write(", "), self.write, node.names) + def visit_Global(self, node, kw="global "): + self.fill(kw) + self.interleave(lambda: self.write(", "), self.write, map(_mangle_keyword, node.names)) def visit_Nonlocal(self, node): - self.fill("nonlocal ") - self.interleave(lambda: self.write(", "), self.write, node.names) + self.visit_Global(node, kw="nonlocal ") def visit_Await(self, node): with self.require_parens(_Precedence.AWAIT, node): @@ -992,7 +1004,7 @@ def visit_ExceptHandler(self, node): self.traverse(node.type) if node.name: self.write(" as ") - self.write(node.name) + self.write(_mangle_keyword(node.name)) with self.block(): self.traverse(node.body) @@ -1001,7 +1013,7 @@ def visit_ClassDef(self, node): for deco in node.decorator_list: self.fill("@") self.traverse(deco) - self.fill("class " + node.name) + self.fill("class " + _mangle_keyword(node.name)) with self.delimit_if("(", ")", condition = node.bases or node.keywords): comma = False for e in node.bases: @@ -1031,7 +1043,7 @@ def _function_helper(self, node, fill_suffix): for deco in node.decorator_list: self.fill("@") self.traverse(deco) - def_str = fill_suffix + " " + node.name + def_str = fill_suffix + " " + _mangle_keyword(node.name) self.fill(def_str) with self.delimit("(", ")"): self.traverse(node.args) @@ -1215,7 +1227,7 @@ def unparse_inner(inner): self._write_fstring_inner(node.format_spec) def visit_Name(self, node): - self.write(node.id) + self.write(_mangle_keyword(node.id)) def _write_docstring(self, node): self.fill() @@ -1455,7 +1467,7 @@ def visit_Attribute(self, node): if isinstance(node.value, Constant) and isinstance(node.value.value, int): self.write(" ") self.write(".") - self.write(node.attr) + self.write(_mangle_keyword(node.attr)) def visit_Call(self, node): self.set_precedence(_Precedence.ATOM, node.func) @@ -1520,7 +1532,7 @@ def visit_Match(self, node): self.traverse(case) def visit_arg(self, node): - self.write(node.arg) + self.write(_mangle_keyword(node.arg)) if node.annotation: self.write(": ") self.traverse(node.annotation) @@ -1551,7 +1563,7 @@ def visit_arguments(self, node): self.write(", ") self.write("*") if node.vararg: - self.write(node.vararg.arg) + self.write(_mangle_keyword(node.vararg.arg)) if node.vararg.annotation: self.write(": ") self.traverse(node.vararg.annotation) @@ -1571,7 +1583,7 @@ def visit_arguments(self, node): first = False else: self.write(", ") - self.write("**" + node.kwarg.arg) + self.write("**" + _mangle_keyword(node.kwarg.arg)) if node.kwarg.annotation: self.write(": ") self.traverse(node.kwarg.annotation) @@ -1580,7 +1592,7 @@ def visit_keyword(self, node): if node.arg is None: self.write("**") else: - self.write(node.arg) + self.write(_mangle_keyword(node.arg)) self.write("=") self.traverse(node.value) @@ -1596,9 +1608,9 @@ def visit_Lambda(self, node): self.traverse(node.body) def visit_alias(self, node): - self.write(node.name) + self.write(_mangle_keyword(node.name)) if node.asname: - self.write(" as " + node.asname) + self.write(" as " + _mangle_keyword(node.asname)) def visit_withitem(self, node): self.traverse(node.context_expr) @@ -1687,7 +1699,7 @@ def visit_MatchAs(self, node): with self.require_parens(_Precedence.TEST, node): self.set_precedence(_Precedence.BOR, node.pattern) self.traverse(node.pattern) - self.write(f" as {node.name}") + self.write(f" as {_mangle_keyword(node.name)}") def visit_MatchOr(self, node): with self.require_parens(_Precedence.BOR, node): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index f5be13aa94a64c..35ed2cadcccea2 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -279,6 +279,31 @@ def test_nonlocal(self): def test_raise_from(self): self.check_ast_roundtrip(raise_from) + def test_unicode_mangled_keywords(self): + # See issue 46520 + self.check_ast_roundtrip('𝕕𝕖𝕗 = 1') + self.check_ast_roundtrip('del 𝕕𝕖𝕝') + self.check_ast_roundtrip('f(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗)') + self.check_ast_roundtrip('def 𝕕𝕖𝕗(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗): pass') + self.check_ast_roundtrip('class 𝕔𝕝𝕒𝕤𝕤: pass') + self.check_ast_roundtrip('with 𝕨𝕚𝕥𝕙 as 𝕒𝕤: pass') + self.check_ast_roundtrip('try: pass\nexcept 𝕖𝕩𝕔𝕖𝕡𝕥 as 𝕒𝕤: pass') + self.check_ast_roundtrip('import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤') + self.check_ast_roundtrip('from 𝕗𝕣𝕠𝕞 import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤') + self.check_ast_roundtrip('global 𝕘𝕝𝕠𝕓𝕒𝕝') + self.check_ast_roundtrip('nonlocal 𝕟𝕠𝕟𝕝𝕠𝕔𝕒𝕝') + self.check_ast_roundtrip('foo.𝕝𝕒𝕞𝕓𝕕𝕒') + self.check_ast_roundtrip('lambda 𝕝𝕒𝕞𝕓𝕕𝕒: 1') + self.check_ast_roundtrip('(𝕕𝕖𝕗 := 1)') + # `match` is parsed unusually, allowing ASCII keywords in many + # places. + self.check_ast_roundtrip('''match match: + case [*case]: 1 + case {**case}: 1 + case 𝕔𝕝𝕒𝕤𝕤(case = 1): 1 + case case as 𝕒𝕤: 1''' + ) + def test_bytes(self): self.check_ast_roundtrip("b'123'") diff --git a/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst b/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst new file mode 100644 index 00000000000000..a5fc78531c83da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst @@ -0,0 +1,2 @@ +``ast.unparse`` can now handle the result of parsing code that uses +not-quite-Python-keywords like "𝕕𝕖𝕗".