Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a993e90

Browse filesBrowse files
bpo-28002: Roundtrip f-strings with ast.unparse better (#19612)
By attempting to avoid backslashes in f-string expressions. We also now proactively raise errors for some backslashes we can't avoid while unparsing FormattedValues Co-authored-by: hauntsaninja <> Co-authored-by: Shantanu <hauntsaninja@users.noreply.github.com> Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
1 parent 9fc319d commit a993e90
Copy full SHA for a993e90

File tree

2 files changed

+115
-37
lines changed
Filter options

2 files changed

+115
-37
lines changed

‎Lib/ast.py

Copy file name to clipboardExpand all lines: Lib/ast.py
+86-24Lines changed: 86 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -662,17 +662,23 @@ def next(self):
662662
except ValueError:
663663
return self
664664

665+
666+
_SINGLE_QUOTES = ("'", '"')
667+
_MULTI_QUOTES = ('"""', "'''")
668+
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
669+
665670
class _Unparser(NodeVisitor):
666671
"""Methods in this class recursively traverse an AST and
667672
output source code for the abstract syntax; original formatting
668673
is disregarded."""
669674

670-
def __init__(self):
675+
def __init__(self, *, _avoid_backslashes=False):
671676
self._source = []
672677
self._buffer = []
673678
self._precedences = {}
674679
self._type_ignores = {}
675680
self._indent = 0
681+
self._avoid_backslashes = _avoid_backslashes
676682

677683
def interleave(self, inter, f, seq):
678684
"""Call f on each item in seq, calling inter() in between."""
@@ -1067,15 +1073,85 @@ def visit_AsyncWith(self, node):
10671073
with self.block(extra=self.get_type_comment(node)):
10681074
self.traverse(node.body)
10691075

1076+
def _str_literal_helper(
1077+
self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
1078+
):
1079+
"""Helper for writing string literals, minimizing escapes.
1080+
Returns the tuple (string literal to write, possible quote types).
1081+
"""
1082+
def escape_char(c):
1083+
# \n and \t are non-printable, but we only escape them if
1084+
# escape_special_whitespace is True
1085+
if not escape_special_whitespace and c in "\n\t":
1086+
return c
1087+
# Always escape backslashes and other non-printable characters
1088+
if c == "\\" or not c.isprintable():
1089+
return c.encode("unicode_escape").decode("ascii")
1090+
return c
1091+
1092+
escaped_string = "".join(map(escape_char, string))
1093+
possible_quotes = quote_types
1094+
if "\n" in escaped_string:
1095+
possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
1096+
possible_quotes = [q for q in possible_quotes if q not in escaped_string]
1097+
if not possible_quotes:
1098+
# If there aren't any possible_quotes, fallback to using repr
1099+
# on the original string. Try to use a quote from quote_types,
1100+
# e.g., so that we use triple quotes for docstrings.
1101+
string = repr(string)
1102+
quote = next((q for q in quote_types if string[0] in q), string[0])
1103+
return string[1:-1], [quote]
1104+
if escaped_string:
1105+
# Sort so that we prefer '''"''' over """\""""
1106+
possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
1107+
# If we're using triple quotes and we'd need to escape a final
1108+
# quote, escape it
1109+
if possible_quotes[0][0] == escaped_string[-1]:
1110+
assert len(possible_quotes[0]) == 3
1111+
escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1]
1112+
return escaped_string, possible_quotes
1113+
1114+
def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
1115+
"""Write string literal value with a best effort attempt to avoid backslashes."""
1116+
string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
1117+
quote_type = quote_types[0]
1118+
self.write(f"{quote_type}{string}{quote_type}")
1119+
10701120
def visit_JoinedStr(self, node):
10711121
self.write("f")
1072-
self._fstring_JoinedStr(node, self.buffer_writer)
1073-
self.write(repr(self.buffer))
1122+
if self._avoid_backslashes:
1123+
self._fstring_JoinedStr(node, self.buffer_writer)
1124+
self._write_str_avoiding_backslashes(self.buffer)
1125+
return
1126+
1127+
# If we don't need to avoid backslashes globally (i.e., we only need
1128+
# to avoid them inside FormattedValues), it's cosmetically preferred
1129+
# to use escaped whitespace. That is, it's preferred to use backslashes
1130+
# for cases like: f"{x}\n". To accomplish this, we keep track of what
1131+
# in our buffer corresponds to FormattedValues and what corresponds to
1132+
# Constant parts of the f-string, and allow escapes accordingly.
1133+
buffer = []
1134+
for value in node.values:
1135+
meth = getattr(self, "_fstring_" + type(value).__name__)
1136+
meth(value, self.buffer_writer)
1137+
buffer.append((self.buffer, isinstance(value, Constant)))
1138+
new_buffer = []
1139+
quote_types = _ALL_QUOTES
1140+
for value, is_constant in buffer:
1141+
# Repeatedly narrow down the list of possible quote_types
1142+
value, quote_types = self._str_literal_helper(
1143+
value, quote_types=quote_types,
1144+
escape_special_whitespace=is_constant
1145+
)
1146+
new_buffer.append(value)
1147+
value = "".join(new_buffer)
1148+
quote_type = quote_types[0]
1149+
self.write(f"{quote_type}{value}{quote_type}")
10741150

10751151
def visit_FormattedValue(self, node):
10761152
self.write("f")
10771153
self._fstring_FormattedValue(node, self.buffer_writer)
1078-
self.write(repr(self.buffer))
1154+
self._write_str_avoiding_backslashes(self.buffer)
10791155

10801156
def _fstring_JoinedStr(self, node, write):
10811157
for value in node.values:
@@ -1090,11 +1166,13 @@ def _fstring_Constant(self, node, write):
10901166

10911167
def _fstring_FormattedValue(self, node, write):
10921168
write("{")
1093-
unparser = type(self)()
1169+
unparser = type(self)(_avoid_backslashes=True)
10941170
unparser.set_precedence(_Precedence.TEST.next(), node.value)
10951171
expr = unparser.visit(node.value)
10961172
if expr.startswith("{"):
10971173
write(" ") # Separate pair of opening brackets as "{ {"
1174+
if "\\" in expr:
1175+
raise ValueError("Unable to avoid backslash in f-string expression part")
10981176
write(expr)
10991177
if node.conversion != -1:
11001178
conversion = chr(node.conversion)
@@ -1111,33 +1189,17 @@ def visit_Name(self, node):
11111189
self.write(node.id)
11121190

11131191
def _write_docstring(self, node):
1114-
def esc_char(c):
1115-
if c in ("\n", "\t"):
1116-
# In the AST form, we don't know the author's intentation
1117-
# about how this should be displayed. We'll only escape
1118-
# \n and \t, because they are more likely to be unescaped
1119-
# in the source
1120-
return c
1121-
return c.encode('unicode_escape').decode('ascii')
1122-
11231192
self.fill()
11241193
if node.kind == "u":
11251194
self.write("u")
1126-
1127-
value = node.value
1128-
if value:
1129-
# Preserve quotes in the docstring by escaping them
1130-
value = "".join(map(esc_char, value))
1131-
if value[-1] == '"':
1132-
value = value.replace('"', '\\"', -1)
1133-
value = value.replace('"""', '""\\"')
1134-
1135-
self.write(f'"""{value}"""')
1195+
self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)
11361196

11371197
def _write_constant(self, value):
11381198
if isinstance(value, (float, complex)):
11391199
# Substitute overflowing decimal literal for AST infinities.
11401200
self.write(repr(value).replace("inf", _INFSTR))
1201+
elif self._avoid_backslashes and isinstance(value, str):
1202+
self._write_str_avoiding_backslashes(value)
11411203
else:
11421204
self.write(repr(value))
11431205

‎Lib/test/test_unparse.py

Copy file name to clipboardExpand all lines: Lib/test/test_unparse.py
+29-13Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,18 @@ def test_fstrings(self):
152152
# See issue 25180
153153
self.check_ast_roundtrip(r"""f'{f"{0}"*3}'""")
154154
self.check_ast_roundtrip(r"""f'{f"{y}"*3}'""")
155+
self.check_ast_roundtrip("""f''""")
156+
self.check_ast_roundtrip('''f"""'end' "quote\\""""''')
157+
158+
def test_fstrings_complicated(self):
159+
# See issue 28002
160+
self.check_ast_roundtrip("""f'''{"'"}'''""")
161+
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
162+
self.check_ast_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'single quote\\'\'\'\'''')
163+
self.check_ast_roundtrip('f"""{\'\'\'\n\'\'\'}"""')
164+
self.check_ast_roundtrip('f"""{g(\'\'\'\n\'\'\')}"""')
165+
self.check_ast_roundtrip('''f"a\\r\\nb"''')
166+
self.check_ast_roundtrip('''f"\\u2028{'x'}"''')
155167

156168
def test_strings(self):
157169
self.check_ast_roundtrip("u'foo'")
@@ -311,6 +323,9 @@ def test_invalid_fstring_conversion(self):
311323
)
312324
)
313325

326+
def test_invalid_fstring_backslash(self):
327+
self.check_invalid(ast.FormattedValue(value=ast.Constant(value="\\\\")))
328+
314329
def test_invalid_set(self):
315330
self.check_invalid(ast.Set(elts=[]))
316331

@@ -330,8 +345,8 @@ def test_docstrings(self):
330345
'\r\\r\t\\t\n\\n',
331346
'""">>> content = \"\"\"blabla\"\"\" <<<"""',
332347
r'foo\n\x00',
333-
'🐍⛎𩸽üéş^\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
334-
348+
"' \\'\\'\\'\"\"\" \"\"\\'\\' \\'",
349+
'🐍⛎𩸽üéş^\\\\X\\\\BB\N{LONG RIGHTWARDS SQUIGGLE ARROW}'
335350
)
336351
for docstring in docstrings:
337352
# check as Module docstrings for easy testing
@@ -416,7 +431,6 @@ def test_simple_expressions_parens(self):
416431
self.check_src_roundtrip("call((yield x))")
417432
self.check_src_roundtrip("return x + (yield x)")
418433

419-
420434
def test_class_bases_and_keywords(self):
421435
self.check_src_roundtrip("class X:\n pass")
422436
self.check_src_roundtrip("class X(A):\n pass")
@@ -429,6 +443,13 @@ def test_class_bases_and_keywords(self):
429443
self.check_src_roundtrip("class X(*args):\n pass")
430444
self.check_src_roundtrip("class X(*args, **kwargs):\n pass")
431445

446+
def test_fstrings(self):
447+
self.check_src_roundtrip('''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-\'\'\'''')
448+
self.check_src_roundtrip('''f"\\u2028{'x'}"''')
449+
self.check_src_roundtrip(r"f'{x}\n'")
450+
self.check_src_roundtrip('''f''\'{"""\n"""}\\n''\'''')
451+
self.check_src_roundtrip('''f''\'{f"""{x}\n"""}\\n''\'''')
452+
432453
def test_docstrings(self):
433454
docstrings = (
434455
'"""simple doc string"""',
@@ -443,6 +464,10 @@ def test_docstrings(self):
443464
'""""""',
444465
'"""\'\'\'"""',
445466
'"""\'\'\'\'\'\'"""',
467+
'"""🐍⛎𩸽üéş^\\\\X\\\\BB⟿"""',
468+
'"""end in single \'quote\'"""',
469+
"'''end in double \"quote\"'''",
470+
'"""almost end in double "quote"."""',
446471
)
447472

448473
for prefix in docstring_prefixes:
@@ -483,9 +508,8 @@ class DirectoryTestCase(ASTTestCase):
483508

484509
lib_dir = pathlib.Path(__file__).parent / ".."
485510
test_directories = (lib_dir, lib_dir / "test")
486-
skip_files = {"test_fstring.py"}
487511
run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py",
488-
"test_ast.py", "test_asdl_parser.py"}
512+
"test_ast.py", "test_asdl_parser.py", "test_fstring.py"}
489513

490514
_files_to_test = None
491515

@@ -525,14 +549,6 @@ def test_files(self):
525549
if test.support.verbose:
526550
print(f"Testing {item.absolute()}")
527551

528-
# Some f-strings are not correctly round-tripped by
529-
# Tools/parser/unparse.py. See issue 28002 for details.
530-
# We need to skip files that contain such f-strings.
531-
if item.name in self.skip_files:
532-
if test.support.verbose:
533-
print(f"Skipping {item.absolute()}: see issue 28002")
534-
continue
535-
536552
with self.subTest(filename=item):
537553
source = read_pyfile(item)
538554
self.check_ast_roundtrip(source)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.