From cbe9fa64d608162d551f3868a86883a2b4b1d21d Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 11:44:36 -0500 Subject: [PATCH 1/6] Factor out common parts of `visit_Global` and `visit_Nonlocal` --- Lib/ast.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index 625738ad681af4..807d26f4887053 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -910,13 +910,12 @@ def visit_Assert(self, node): self.write(", ") self.traverse(node.msg) - def visit_Global(self, node): - self.fill("global ") + def visit_Global(self, node, kw="global "): + self.fill(kw) self.interleave(lambda: self.write(", "), self.write, node.names) def visit_Nonlocal(self, node): - self.fill("nonlocal ") - self.interleave(lambda: self.write(", "), self.write, node.names) + self.visit_Global(node, kw="nonlocal ") def visit_Await(self, node): with self.require_parens(_Precedence.AWAIT, node): From 3ef4cec0e157a5e5b1baab740cb3cb07baa07605 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 13:05:18 -0500 Subject: [PATCH 2/6] Mangle keywords in `ast.Name` --- Lib/ast.py | 15 ++++++++++++++- Lib/test/test_unparse.py | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index 807d26f4887053..b0584de918e710 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -28,6 +28,7 @@ from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto, _simple_enum +import keyword def parse(source, filename='', mode='exec', *, @@ -668,6 +669,18 @@ def next(self): return self +_MANGLE_INCR = -ord('a') + ord('𝐚') + +def _mangle_keyword(x): + """If the input would be a keyword, replace the first character with a + non-ASCII character that's equivalent according to NFKC. Then it + won't be parsed as a keyword, as desired.""" + return ( + x if x in ('True', 'False', 'None') else + chr(ord(x[0]) + _MANGLE_INCR) + x[1:] if keyword.iskeyword(x) else + x) + + _SINGLE_QUOTES = ("'", '"') _MULTI_QUOTES = ('"""', "'''") _ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) @@ -1214,7 +1227,7 @@ def unparse_inner(inner): self._write_fstring_inner(node.format_spec) def visit_Name(self, node): - self.write(node.id) + self.write(_mangle_keyword(node.id)) def _write_docstring(self, node): self.fill() diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index f5be13aa94a64c..42254b89f74132 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -279,6 +279,10 @@ def test_nonlocal(self): def test_raise_from(self): self.check_ast_roundtrip(raise_from) + def test_unicode_mangled_keywords(self): + # See issue 46520 + self.check_ast_roundtrip('𝕕𝕖𝕗 = 1') + def test_bytes(self): self.check_ast_roundtrip("b'123'") From d7fb0fdaa834890bb7ba5d83313877db6e9e2029 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 12:46:50 -0500 Subject: [PATCH 3/6] Call `_mangle_keyword` in more places --- Lib/ast.py | 24 ++++++++++++------------ Lib/test/test_unparse.py | 13 +++++++++++++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index b0584de918e710..ccbd599d1bbff4 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -867,7 +867,7 @@ def visit_ImportFrom(self, node): self.fill("from ") self.write("." * node.level) if node.module: - self.write(node.module) + self.write(_mangle_keyword(node.module)) self.write(" import ") self.interleave(lambda: self.write(", "), self.traverse, node.names) @@ -925,7 +925,7 @@ def visit_Assert(self, node): def visit_Global(self, node, kw="global "): self.fill(kw) - self.interleave(lambda: self.write(", "), self.write, node.names) + self.interleave(lambda: self.write(", "), self.write, map(_mangle_keyword, node.names)) def visit_Nonlocal(self, node): self.visit_Global(node, kw="nonlocal ") @@ -1004,7 +1004,7 @@ def visit_ExceptHandler(self, node): self.traverse(node.type) if node.name: self.write(" as ") - self.write(node.name) + self.write(_mangle_keyword(node.name)) with self.block(): self.traverse(node.body) @@ -1013,7 +1013,7 @@ def visit_ClassDef(self, node): for deco in node.decorator_list: self.fill("@") self.traverse(deco) - self.fill("class " + node.name) + self.fill("class " + _mangle_keyword(node.name)) with self.delimit_if("(", ")", condition = node.bases or node.keywords): comma = False for e in node.bases: @@ -1043,7 +1043,7 @@ def _function_helper(self, node, fill_suffix): for deco in node.decorator_list: self.fill("@") self.traverse(deco) - def_str = fill_suffix + " " + node.name + def_str = fill_suffix + " " + _mangle_keyword(node.name) self.fill(def_str) with self.delimit("(", ")"): self.traverse(node.args) @@ -1467,7 +1467,7 @@ def visit_Attribute(self, node): if isinstance(node.value, Constant) and isinstance(node.value.value, int): self.write(" ") self.write(".") - self.write(node.attr) + self.write(_mangle_keyword(node.attr)) def visit_Call(self, node): self.set_precedence(_Precedence.ATOM, node.func) @@ -1532,7 +1532,7 @@ def visit_Match(self, node): self.traverse(case) def visit_arg(self, node): - self.write(node.arg) + self.write(_mangle_keyword(node.arg)) if node.annotation: self.write(": ") self.traverse(node.annotation) @@ -1563,7 +1563,7 @@ def visit_arguments(self, node): self.write(", ") self.write("*") if node.vararg: - self.write(node.vararg.arg) + self.write(_mangle_keyword(node.vararg.arg)) if node.vararg.annotation: self.write(": ") self.traverse(node.vararg.annotation) @@ -1583,7 +1583,7 @@ def visit_arguments(self, node): first = False else: self.write(", ") - self.write("**" + node.kwarg.arg) + self.write("**" + _mangle_keyword(node.kwarg.arg)) if node.kwarg.annotation: self.write(": ") self.traverse(node.kwarg.annotation) @@ -1592,7 +1592,7 @@ def visit_keyword(self, node): if node.arg is None: self.write("**") else: - self.write(node.arg) + self.write(_mangle_keyword(node.arg)) self.write("=") self.traverse(node.value) @@ -1608,9 +1608,9 @@ def visit_Lambda(self, node): self.traverse(node.body) def visit_alias(self, node): - self.write(node.name) + self.write(_mangle_keyword(node.name)) if node.asname: - self.write(" as " + node.asname) + self.write(" as " + _mangle_keyword(node.asname)) def visit_withitem(self, node): self.traverse(node.context_expr) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 42254b89f74132..45fad9d60fb622 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -282,6 +282,19 @@ def test_raise_from(self): def test_unicode_mangled_keywords(self): # See issue 46520 self.check_ast_roundtrip('𝕕𝕖𝕗 = 1') + self.check_ast_roundtrip('del 𝕕𝕖𝕝') + self.check_ast_roundtrip('f(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗)') + self.check_ast_roundtrip('def 𝕕𝕖𝕗(𝕕𝕖𝕗, 𝕕𝕖𝕗 = 2, *𝕕𝕖𝕗, **𝕕𝕖𝕗): pass') + self.check_ast_roundtrip('class 𝕔𝕝𝕒𝕤𝕤: pass') + self.check_ast_roundtrip('with 𝕨𝕚𝕥𝕙 as 𝕒𝕤: pass') + self.check_ast_roundtrip('try: pass\nexcept 𝕖𝕩𝕔𝕖𝕡𝕥 as 𝕒𝕤: pass') + self.check_ast_roundtrip('import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤') + self.check_ast_roundtrip('from 𝕗𝕣𝕠𝕞 import 𝕚𝕞𝕡𝕠𝕣𝕥 as 𝕒𝕤') + self.check_ast_roundtrip('global 𝕘𝕝𝕠𝕓𝕒𝕝') + self.check_ast_roundtrip('nonlocal 𝕟𝕠𝕟𝕝𝕠𝕔𝕒𝕝') + self.check_ast_roundtrip('foo.𝕝𝕒𝕞𝕓𝕕𝕒') + self.check_ast_roundtrip('lambda 𝕝𝕒𝕞𝕓𝕕𝕒: 1') + self.check_ast_roundtrip('(𝕕𝕖𝕗 := 1)') def test_bytes(self): self.check_ast_roundtrip("b'123'") From 65bb8e82f3bfa1a859019c9d8992f8fd85c44267 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 12:39:11 -0500 Subject: [PATCH 4/6] Mangle some keywords in `case` --- Lib/ast.py | 2 +- Lib/test/test_unparse.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/ast.py b/Lib/ast.py index ccbd599d1bbff4..bb6eebc64a77ed 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1699,7 +1699,7 @@ def visit_MatchAs(self, node): with self.require_parens(_Precedence.TEST, node): self.set_precedence(_Precedence.BOR, node.pattern) self.traverse(node.pattern) - self.write(f" as {node.name}") + self.write(f" as {_mangle_keyword(node.name)}") def visit_MatchOr(self, node): with self.require_parens(_Precedence.BOR, node): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 45fad9d60fb622..35ed2cadcccea2 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -295,6 +295,14 @@ def test_unicode_mangled_keywords(self): self.check_ast_roundtrip('foo.𝕝𝕒𝕞𝕓𝕕𝕒') self.check_ast_roundtrip('lambda 𝕝𝕒𝕞𝕓𝕕𝕒: 1') self.check_ast_roundtrip('(𝕕𝕖𝕗 := 1)') + # `match` is parsed unusually, allowing ASCII keywords in many + # places. + self.check_ast_roundtrip('''match match: + case [*case]: 1 + case {**case}: 1 + case 𝕔𝕝𝕒𝕤𝕤(case = 1): 1 + case case as 𝕒𝕤: 1''' + ) def test_bytes(self): self.check_ast_roundtrip("b'123'") From 1e8d5c9686178a1799895c77650385d99de34c26 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 12:57:59 -0500 Subject: [PATCH 5/6] Add blurb --- .../next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst diff --git a/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst b/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst new file mode 100644 index 00000000000000..a5fc78531c83da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-01-29-12-57-50.bpo-46520.38HC5x.rst @@ -0,0 +1,2 @@ +``ast.unparse`` can now handle the result of parsing code that uses +not-quite-Python-keywords like "𝕕𝕖𝕗". From cce1bfd0f126bdfb53e88cda1601098312585cfb Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Sat, 29 Jan 2022 14:06:51 -0500 Subject: [PATCH 6/6] Use `import from` to avoid a name collision --- Lib/ast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/ast.py b/Lib/ast.py index bb6eebc64a77ed..ef5ccb03bd766c 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -28,7 +28,7 @@ from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto, _simple_enum -import keyword +from keyword import iskeyword def parse(source, filename='', mode='exec', *, @@ -677,7 +677,7 @@ def _mangle_keyword(x): won't be parsed as a keyword, as desired.""" return ( x if x in ('True', 'False', 'None') else - chr(ord(x[0]) + _MANGLE_INCR) + x[1:] if keyword.iskeyword(x) else + chr(ord(x[0]) + _MANGLE_INCR) + x[1:] if iskeyword(x) else x)