From b6dca9ea6951cdb16f23de5eab844a6ecb670c2a Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Mon, 13 Feb 2017 02:07:56 +0100 Subject: [PATCH 1/2] bpo-27494: Fix 2to3 handling of trailing comma after a generator expression While this is a valid Python 2 and Python 3 syntax lib2to3 would choke on it: set(x for x in [],) This patch changes the grammar definition used by lib2to3 so that the actual Python syntax is supported now and backwards compatibility is preserved. --- Lib/lib2to3/Grammar.txt | 25 ++++++++++++++++--- Lib/lib2to3/fixer_util.py | 6 ++--- Lib/lib2to3/fixes/fix_dict.py | 2 +- Lib/lib2to3/fixes/fix_paren.py | 4 +-- Lib/lib2to3/fixes/fix_xrange.py | 2 +- Lib/lib2to3/tests/test_parser.py | 7 ++++++ .../2017-09-26-01-43-17.bpo-27494.37QnaT.rst | 2 ++ 7 files changed, 38 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst diff --git a/Lib/lib2to3/Grammar.txt b/Lib/lib2to3/Grammar.txt index 2abd5ee65b5bbd..4ff03c7864a3f0 100644 --- a/Lib/lib2to3/Grammar.txt +++ b/Lib/lib2to3/Grammar.txt @@ -130,8 +130,8 @@ atom: ('(' [yield_expr|testlist_gexp] ')' | '{' [dictsetmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+ | '.' '.' '.') -listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) -testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +listmaker: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) +testlist_gexp: (test|star_expr) ( old_comp_for | (',' (test|star_expr))* [','] ) lambdef: 'lambda' [varargslist] ':' test trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME subscriptlist: subscript (',' subscript)* [','] @@ -161,9 +161,28 @@ argument: ( test [comp_for] | star_expr ) comp_iter: comp_for | comp_if -comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [comp_iter] +comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] comp_if: 'if' old_test [comp_iter] +# See note above regarding testlist_safe to see why testlist_safe is +# necessary. Unfortunately we can't use it indiscriminately in all derivations +# using comp_for-like pattern because testlist_safe derivation contains +# comma which clashes with trailing comma in arglist. +# +# The above was an issue because the parser would not follow the correct derivation +# when parsing syntactically valid Python code. Since testlist_safe was created +# specifically to handle list comprehensions and generator expressions enclosed +# with parentheses it's safe to only use it in those. That avoids the issue +# mentioned above and we can parse code like set(x for x in [],). +# +# Note that the syntax supported by this set of rules is not a valid Python 3 +# syntax, hence the prefix "old". +# +# See the bug report: http://bugs.python.org/issue27494 +old_comp_iter: old_comp_for | old_comp_if +old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter] +old_comp_if: 'if' old_test [old_comp_iter] + testlist1: test (',' test)* # not used in grammar, but may appear in "node" passed from Parser to Compiler diff --git a/Lib/lib2to3/fixer_util.py b/Lib/lib2to3/fixer_util.py index babe6cb3f662a9..2f9b1c2e766e78 100644 --- a/Lib/lib2to3/fixer_util.py +++ b/Lib/lib2to3/fixer_util.py @@ -101,8 +101,8 @@ def ListComp(xp, fp, it, test=None): test.prefix = " " if_leaf = Leaf(token.NAME, "if") if_leaf.prefix = " " - inner_args.append(Node(syms.comp_if, [if_leaf, test])) - inner = Node(syms.listmaker, [xp, Node(syms.comp_for, inner_args)]) + inner_args.append(Node(syms.old_comp_if, [if_leaf, test])) + inner = Node(syms.listmaker, [xp, Node(syms.old_comp_for, inner_args)]) return Node(syms.atom, [Leaf(token.LBRACE, "["), inner, @@ -208,7 +208,7 @@ def attr_chain(obj, attr): next = getattr(next, attr) p0 = """for_stmt< 'for' any 'in' node=any ':' any* > - | comp_for< 'for' any 'in' node=any any* > + | old_comp_for< 'for' any 'in' node=any any* > """ p1 = """ power< diff --git a/Lib/lib2to3/fixes/fix_dict.py b/Lib/lib2to3/fixes/fix_dict.py index d3655c9f1b2d9b..55be553baac81d 100644 --- a/Lib/lib2to3/fixes/fix_dict.py +++ b/Lib/lib2to3/fixes/fix_dict.py @@ -83,7 +83,7 @@ def transform(self, node, results): p1 = patcomp.compile_pattern(P1) P2 = """for_stmt< 'for' any 'in' node=any ':' any* > - | comp_for< 'for' any 'in' node=any any* > + | old_comp_for< 'for' any 'in' node=any any* > """ p2 = patcomp.compile_pattern(P2) diff --git a/Lib/lib2to3/fixes/fix_paren.py b/Lib/lib2to3/fixes/fix_paren.py index b205aa7e1e93fb..de49eef157d586 100644 --- a/Lib/lib2to3/fixes/fix_paren.py +++ b/Lib/lib2to3/fixes/fix_paren.py @@ -15,7 +15,7 @@ class FixParen(fixer_base.BaseFix): PATTERN = """ atom< ('[' | '(') (listmaker< any - comp_for< + old_comp_for< 'for' NAME 'in' target=testlist_safe< any (',' any)+ [','] > @@ -24,7 +24,7 @@ class FixParen(fixer_base.BaseFix): > | testlist_gexp< any - comp_for< + old_comp_for< 'for' NAME 'in' target=testlist_safe< any (',' any)+ [','] > diff --git a/Lib/lib2to3/fixes/fix_xrange.py b/Lib/lib2to3/fixes/fix_xrange.py index 1e491e166a3f1c..f5f06f3543502f 100644 --- a/Lib/lib2to3/fixes/fix_xrange.py +++ b/Lib/lib2to3/fixes/fix_xrange.py @@ -55,7 +55,7 @@ def transform_range(self, node, results): p1 = patcomp.compile_pattern(P1) P2 = """for_stmt< 'for' any 'in' node=any ':' any* > - | comp_for< 'for' any 'in' node=any any* > + | old_comp_for< 'for' any 'in' node=any any* > | comparison< any 'in' node=any any*> """ p2 = patcomp.compile_pattern(P2) diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py index 3f7ab9714e38f9..2efcb80c2f94a9 100644 --- a/Lib/lib2to3/tests/test_parser.py +++ b/Lib/lib2to3/tests/test_parser.py @@ -459,6 +459,13 @@ def test_multiline_str_literals(self): self.validate(s) +class TestGeneratorExpressions(GrammarTest): + + def test_trailing_comma_after_generator_expression_argument_works(self): + # BPO issue 27494 + self.validate("set(x for x in [],)") + + def diff(fn, result): try: with open('@', 'w') as f: diff --git a/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst b/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst new file mode 100644 index 00000000000000..d35d4b9944bc84 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst @@ -0,0 +1,2 @@ +2to3 doesn't choke on a trailing comma following a generator expression +anymore. From 07560411f34dfd464b1d3b0874401288c1a8b60c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 4 Oct 2017 23:49:56 -0700 Subject: [PATCH 2/2] reword --- Lib/lib2to3/Grammar.txt | 20 +++++++++---------- .../2017-09-26-01-43-17.bpo-27494.37QnaT.rst | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Lib/lib2to3/Grammar.txt b/Lib/lib2to3/Grammar.txt index 4ff03c7864a3f0..ded032522bada4 100644 --- a/Lib/lib2to3/Grammar.txt +++ b/Lib/lib2to3/Grammar.txt @@ -164,21 +164,21 @@ comp_iter: comp_for | comp_if comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter] comp_if: 'if' old_test [comp_iter] -# See note above regarding testlist_safe to see why testlist_safe is -# necessary. Unfortunately we can't use it indiscriminately in all derivations -# using comp_for-like pattern because testlist_safe derivation contains -# comma which clashes with trailing comma in arglist. +# As noted above, testlist_safe extends the syntax allowed in list +# comprehensions and generators. We can't use it indiscriminately in all +# derivations using a comp_for-like pattern because the testlist_safe derivation +# contains comma which clashes with trailing comma in arglist. # -# The above was an issue because the parser would not follow the correct derivation +# This was an issue because the parser would not follow the correct derivation # when parsing syntactically valid Python code. Since testlist_safe was created # specifically to handle list comprehensions and generator expressions enclosed -# with parentheses it's safe to only use it in those. That avoids the issue -# mentioned above and we can parse code like set(x for x in [],). +# with parentheses, it's safe to only use it in those. That avoids the issue; we +# can parse code like set(x for x in [],). # -# Note that the syntax supported by this set of rules is not a valid Python 3 -# syntax, hence the prefix "old". +# The syntax supported by this set of rules is not a valid Python 3 syntax, +# hence the prefix "old". # -# See the bug report: http://bugs.python.org/issue27494 +# See https://bugs.python.org/issue27494 old_comp_iter: old_comp_for | old_comp_if old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter] old_comp_if: 'if' old_test [old_comp_iter] diff --git a/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst b/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst index d35d4b9944bc84..5b5362a41c5eb2 100644 --- a/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst +++ b/Misc/NEWS.d/next/Library/2017-09-26-01-43-17.bpo-27494.37QnaT.rst @@ -1,2 +1,2 @@ -2to3 doesn't choke on a trailing comma following a generator expression -anymore. +Make 2to3 accept a trailing comma in generator expressions. For example, ``set(x +for x in [],)`` is now allowed.