From 0cd61fc5f199bf4524920e9bf2aeac3169ee7f71 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 31 May 2017 23:27:40 +0300 Subject: [PATCH 1/2] bpo-30529: Fix errors for invalid whitespaces in f-string subexpressions. 'invalid character in identifier' now is raised instead of 'f-string: empty expression not allowed' if a subexpression contains only whitespaces and they are not accepted by Python parser. --- Lib/test/test_fstring.py | 4 ++++ Python/ast.c | 27 +++++---------------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 25730029ae76f1..7da716dd0fc14a 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -279,6 +279,7 @@ def test_missing_expression(self): "f'{ !r}'", "f'{10:{ }}'", "f' { } '", + "f'''{\t\x0c\r\n}'''", # Catch the empty expression before the # invalid conversion. @@ -300,6 +301,9 @@ def test_missing_expression(self): "f'{:x'", ]) + self.assertAllRaise(SyntaxError, 'invalid character in identifier', + ["f'''{\xa0}'''"]) + def test_parens_in_expressions(self): self.assertEqual(f'{3,}', '(3,)') diff --git a/Python/ast.c b/Python/ast.c index 205c711ac31c0f..c139df4828ab40 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -4274,49 +4274,32 @@ fstring_compile_expr(const char *expr_start, const char *expr_end, struct compiling *c, const node *n) { - int all_whitespace = 1; - int kind; - void *data; PyCompilerFlags cf; mod_ty mod; char *str; - PyObject *o; Py_ssize_t len; - Py_ssize_t i; + const char *s; assert(expr_end >= expr_start); assert(*(expr_start-1) == '{'); assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':'); - /* We know there are no escapes here, because backslashes are not allowed, - and we know it's utf-8 encoded (per PEP 263). But, in order to check - that each char is not whitespace, we need to decode it to unicode. - Which is unfortunate, but such is life. */ - /* If the substring is all whitespace, it's an error. We need to catch this here, and not when we call PyParser_ASTFromString, because turning the expression '' in to '()' would go from being invalid to valid. */ /* Note that this code says an empty string is all whitespace. That's important. There's a test for it: f'{}'. */ - o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL); - if (o == NULL) - return NULL; - len = PyUnicode_GET_LENGTH(o); - kind = PyUnicode_KIND(o); - data = PyUnicode_DATA(o); - for (i = 0; i < len; i++) { - if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) { - all_whitespace = 0; + for (s = expr_start; s != expr_end; s++) { + char c = *s; + if (!(c == ' ' || c == '\t' || c == '\n' || c == '\014')) { break; } } - Py_DECREF(o); - if (all_whitespace) { + if (s == expr_end) { ast_error(c, n, "f-string: empty expression not allowed"); return NULL; } - /* Reuse len to be the length of the utf-8 input string. */ len = expr_end - expr_start; /* Allocate 3 extra bytes: open paren, close paren, null byte. */ str = PyMem_RawMalloc(len + 3); From b27d5fcc675248aea47d784275ba51b1d5dada72 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 8 Jun 2017 20:06:10 +0300 Subject: [PATCH 2/2] Address review comments. --- Lib/test/test_fstring.py | 10 ++++++++-- Python/ast.c | 6 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 7da716dd0fc14a..b39870457af492 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -279,7 +279,10 @@ def test_missing_expression(self): "f'{ !r}'", "f'{10:{ }}'", "f' { } '", - "f'''{\t\x0c\r\n}'''", + + # The Python parser ignores also the following + # whitespace characters in additional to a space. + "f'''{\t\f\r\n}'''", # Catch the empty expression before the # invalid conversion. @@ -301,8 +304,11 @@ def test_missing_expression(self): "f'{:x'", ]) + # Different error message is raised for other whitespace characters. self.assertAllRaise(SyntaxError, 'invalid character in identifier', - ["f'''{\xa0}'''"]) + ["f'''{\xa0}'''", + "\xa0", + ]) def test_parens_in_expressions(self): self.assertEqual(f'{3,}', '(3,)') diff --git a/Python/ast.c b/Python/ast.c index c139df4828ab40..7551b6f5655ea3 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -4287,11 +4287,11 @@ fstring_compile_expr(const char *expr_start, const char *expr_end, /* If the substring is all whitespace, it's an error. We need to catch this here, and not when we call PyParser_ASTFromString, because turning the expression '' in to '()' would go from being invalid to valid. */ - /* Note that this code says an empty string is all whitespace. That's - important. There's a test for it: f'{}'. */ for (s = expr_start; s != expr_end; s++) { char c = *s; - if (!(c == ' ' || c == '\t' || c == '\n' || c == '\014')) { + /* The Python parser ignores only the following whitespace + characters (\r already is converted to \n). */ + if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) { break; } }