From 1b61a5e3c65ed8d505958f955cba063762dd57b5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 30 Apr 2022 18:37:32 -0600 Subject: [PATCH 01/12] Tagged String syntax -- RAISES SYNTAX ERROR --- Grammar/python.gram | 1 + Include/internal/pycore_ast.h | 14 +++- Include/internal/pycore_ast_state.h | 2 + Parser/Python.asdl | 1 + Parser/action_helpers.c | 13 ++++ Parser/parser.c | 30 ++++++++ Parser/pegen.h | 1 + Python/Python-ast.c | 106 ++++++++++++++++++++++++++++ Python/ast.c | 4 ++ Python/compile.c | 5 ++ 10 files changed, 174 insertions(+), 3 deletions(-) diff --git a/Grammar/python.gram b/Grammar/python.gram index 15c40b6bbbacdc..c2eb667f06f30a 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -796,6 +796,7 @@ slice[expr_ty]: | a=named_expression { a } atom[expr_ty]: + | a=NAME &STRING b=strings { _PyPegen_tag_string(p, a, b) } | NAME | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } diff --git a/Include/internal/pycore_ast.h b/Include/internal/pycore_ast.h index 36277efe9c5ca5..c36fbc1f13b2a4 100644 --- a/Include/internal/pycore_ast.h +++ b/Include/internal/pycore_ast.h @@ -341,9 +341,10 @@ enum _expr_kind {BoolOp_kind=1, NamedExpr_kind=2, BinOp_kind=3, UnaryOp_kind=4, ListComp_kind=9, SetComp_kind=10, DictComp_kind=11, GeneratorExp_kind=12, Await_kind=13, Yield_kind=14, YieldFrom_kind=15, Compare_kind=16, Call_kind=17, - FormattedValue_kind=18, JoinedStr_kind=19, Constant_kind=20, - Attribute_kind=21, Subscript_kind=22, Starred_kind=23, - Name_kind=24, List_kind=25, Tuple_kind=26, Slice_kind=27}; + FormattedValue_kind=18, JoinedStr_kind=19, TagString_kind=20, + Constant_kind=21, Attribute_kind=22, Subscript_kind=23, + Starred_kind=24, Name_kind=25, List_kind=26, Tuple_kind=27, + Slice_kind=28}; struct _expr { enum _expr_kind kind; union { @@ -443,6 +444,11 @@ struct _expr { asdl_expr_seq *values; } JoinedStr; + struct { + expr_ty tag; + expr_ty str; + } TagString; + struct { constant value; string kind; @@ -771,6 +777,8 @@ expr_ty _PyAST_FormattedValue(expr_ty value, int conversion, expr_ty end_lineno, int end_col_offset, PyArena *arena); expr_ty _PyAST_JoinedStr(asdl_expr_seq * values, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_TagString(expr_ty tag, expr_ty str, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); expr_ty _PyAST_Constant(constant value, string kind, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); diff --git a/Include/internal/pycore_ast_state.h b/Include/internal/pycore_ast_state.h index da78bba3b69bdf..41954346e607da 100644 --- a/Include/internal/pycore_ast_state.h +++ b/Include/internal/pycore_ast_state.h @@ -132,6 +132,7 @@ struct ast_state { PyObject *Sub_singleton; PyObject *Sub_type; PyObject *Subscript_type; + PyObject *TagString_type; PyObject *TryStar_type; PyObject *Try_type; PyObject *Tuple_type; @@ -232,6 +233,7 @@ struct ast_state { PyObject *slice; PyObject *step; PyObject *stmt_type; + PyObject *str; PyObject *subject; PyObject *tag; PyObject *target; diff --git a/Parser/Python.asdl b/Parser/Python.asdl index e9423a7c984f21..9136144cc090d4 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -77,6 +77,7 @@ module Python | Call(expr func, expr* args, keyword* keywords) | FormattedValue(expr value, int conversion, expr? format_spec) | JoinedStr(expr* values) + | TagString(expr tag, expr str) | Constant(constant value, string? kind) -- the following expression can appear in assignment context diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index d1be679aff2e7b..37312ea11c2be1 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -869,6 +869,19 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) return new_seq; } +expr_ty +_PyPegen_tag_string(Parser *p, expr_ty tag, expr_ty str) +{ + // No prefixes (f, r, b, u) + // Parse like fstring + // Create a node similar to f-string AST + return _PyAST_TagString(tag, str, + tag->lineno, tag->col_offset, str->end_lineno, str->end_col_offset, + p->arena); + +} + + expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) { diff --git a/Parser/parser.c b/Parser/parser.c index adc8d509eb7d7d..ad1196e8a213d1 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14111,6 +14111,7 @@ slice_rule(Parser *p) } // atom: +// | NAME &STRING strings // | NAME // | 'True' // | 'False' @@ -14143,6 +14144,35 @@ atom_rule(Parser *p) UNUSED(_start_lineno); // Only used by EXTRA macro int _start_col_offset = p->tokens[_mark]->col_offset; UNUSED(_start_col_offset); // Only used by EXTRA macro + { // NAME &STRING strings + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME &STRING strings")); + expr_ty a; + expr_ty b; + if ( + (a = _PyPegen_name_token(p)) // NAME + && + _PyPegen_lookahead(1, _PyPegen_string_token, p) + && + (b = strings_rule(p)) // strings + ) + { + D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME &STRING strings")); + _res = _PyPegen_tag_string ( p , a , b ); + if (_res == NULL && PyErr_Occurred()) { + p->error_indicator = 1; + p->level--; + return NULL; + } + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s atom[%d-%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME &STRING strings")); + } { // NAME if (p->error_indicator) { p->level--; diff --git a/Parser/pegen.h b/Parser/pegen.h index fe0c327b875566..31340e46dc821c 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -317,6 +317,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); +expr_ty _PyPegen_tag_string(Parser *p, expr_ty, expr_ty); expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty); expr_ty _PyPegen_ensure_real(Parser *p, expr_ty); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); diff --git a/Python/Python-ast.c b/Python/Python-ast.c index da79463375a1a1..7863066fa73c2b 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -146,6 +146,7 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(state->Sub_singleton); Py_CLEAR(state->Sub_type); Py_CLEAR(state->Subscript_type); + Py_CLEAR(state->TagString_type); Py_CLEAR(state->TryStar_type); Py_CLEAR(state->Try_type); Py_CLEAR(state->Tuple_type); @@ -246,6 +247,7 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(state->slice); Py_CLEAR(state->step); Py_CLEAR(state->stmt_type); + Py_CLEAR(state->str); Py_CLEAR(state->subject); Py_CLEAR(state->tag); Py_CLEAR(state->target); @@ -343,6 +345,7 @@ static int init_identifiers(struct ast_state *state) if ((state->simple = PyUnicode_InternFromString("simple")) == NULL) return 0; if ((state->slice = PyUnicode_InternFromString("slice")) == NULL) return 0; if ((state->step = PyUnicode_InternFromString("step")) == NULL) return 0; + if ((state->str = PyUnicode_InternFromString("str")) == NULL) return 0; if ((state->subject = PyUnicode_InternFromString("subject")) == NULL) return 0; if ((state->tag = PyUnicode_InternFromString("tag")) == NULL) return 0; if ((state->target = PyUnicode_InternFromString("target")) == NULL) return 0; @@ -598,6 +601,10 @@ static const char * const FormattedValue_fields[]={ static const char * const JoinedStr_fields[]={ "values", }; +static const char * const TagString_fields[]={ + "tag", + "str", +}; static const char * const Constant_fields[]={ "value", "kind", @@ -1326,6 +1333,7 @@ init_types(struct ast_state *state) " | Call(expr func, expr* args, keyword* keywords)\n" " | FormattedValue(expr value, int conversion, expr? format_spec)\n" " | JoinedStr(expr* values)\n" + " | TagString(expr tag, expr str)\n" " | Constant(constant value, string? kind)\n" " | Attribute(expr value, identifier attr, expr_context ctx)\n" " | Subscript(expr value, expr slice, expr_context ctx)\n" @@ -1423,6 +1431,10 @@ init_types(struct ast_state *state) JoinedStr_fields, 1, "JoinedStr(expr* values)"); if (!state->JoinedStr_type) return 0; + state->TagString_type = make_type(state, "TagString", state->expr_type, + TagString_fields, 2, + "TagString(expr tag, expr str)"); + if (!state->TagString_type) return 0; state->Constant_type = make_type(state, "Constant", state->expr_type, Constant_fields, 2, "Constant(constant value, string? kind)"); @@ -3039,6 +3051,34 @@ _PyAST_JoinedStr(asdl_expr_seq * values, int lineno, int col_offset, int return p; } +expr_ty +_PyAST_TagString(expr_ty tag, expr_ty str, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena) +{ + expr_ty p; + if (!tag) { + PyErr_SetString(PyExc_ValueError, + "field 'tag' is required for TagString"); + return NULL; + } + if (!str) { + PyErr_SetString(PyExc_ValueError, + "field 'str' is required for TagString"); + return NULL; + } + p = (expr_ty)_PyArena_Malloc(arena, sizeof(*p)); + if (!p) + return NULL; + p->kind = TagString_kind; + p->v.TagString.tag = tag; + p->v.TagString.str = str; + p->lineno = lineno; + p->col_offset = col_offset; + p->end_lineno = end_lineno; + p->end_col_offset = end_col_offset; + return p; +} + expr_ty _PyAST_Constant(constant value, string kind, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena) @@ -4544,6 +4584,21 @@ ast2obj_expr(struct ast_state *state, void* _o) goto failed; Py_DECREF(value); break; + case TagString_kind: + tp = (PyTypeObject *)state->TagString_type; + result = PyType_GenericNew(tp, NULL, NULL); + if (!result) goto failed; + value = ast2obj_expr(state, o->v.TagString.tag); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->tag, value) == -1) + goto failed; + Py_DECREF(value); + value = ast2obj_expr(state, o->v.TagString.str); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->str, value) == -1) + goto failed; + Py_DECREF(value); + break; case Constant_kind: tp = (PyTypeObject *)state->Constant_type; result = PyType_GenericNew(tp, NULL, NULL); @@ -9333,6 +9388,54 @@ obj2ast_expr(struct ast_state *state, PyObject* obj, expr_ty* out, PyArena* if (*out == NULL) goto failed; return 0; } + tp = state->TagString_type; + isinstance = PyObject_IsInstance(obj, tp); + if (isinstance == -1) { + return 1; + } + if (isinstance) { + expr_ty tag; + expr_ty str; + + if (_PyObject_LookupAttr(obj, state->tag, &tmp) < 0) { + return 1; + } + if (tmp == NULL) { + PyErr_SetString(PyExc_TypeError, "required field \"tag\" missing from TagString"); + return 1; + } + else { + int res; + if (Py_EnterRecursiveCall(" while traversing 'TagString' node")) { + goto failed; + } + res = obj2ast_expr(state, tmp, &tag, arena); + Py_LeaveRecursiveCall(); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + if (_PyObject_LookupAttr(obj, state->str, &tmp) < 0) { + return 1; + } + if (tmp == NULL) { + PyErr_SetString(PyExc_TypeError, "required field \"str\" missing from TagString"); + return 1; + } + else { + int res; + if (Py_EnterRecursiveCall(" while traversing 'TagString' node")) { + goto failed; + } + res = obj2ast_expr(state, tmp, &str, arena); + Py_LeaveRecursiveCall(); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + *out = _PyAST_TagString(tag, str, lineno, col_offset, end_lineno, + end_col_offset, arena); + if (*out == NULL) goto failed; + return 0; + } tp = state->Constant_type; isinstance = PyObject_IsInstance(obj, tp); if (isinstance == -1) { @@ -12002,6 +12105,9 @@ astmodule_exec(PyObject *m) if (PyModule_AddObjectRef(m, "JoinedStr", state->JoinedStr_type) < 0) { return -1; } + if (PyModule_AddObjectRef(m, "TagString", state->TagString_type) < 0) { + return -1; + } if (PyModule_AddObjectRef(m, "Constant", state->Constant_type) < 0) { return -1; } diff --git a/Python/ast.c b/Python/ast.c index 607281e2685535..1217ddde240d5c 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -321,6 +321,10 @@ validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx) case JoinedStr_kind: ret = validate_exprs(state, exp->v.JoinedStr.values, Load, 0); break; + case TagString_kind: + ret = validate_expr(state, exp->v.TagString.tag, Load) && + validate_expr(state, exp->v.TagString.str, Load); + break; case FormattedValue_kind: if (validate_expr(state, exp->v.FormattedValue.value, Load) == 0) return 0; diff --git a/Python/compile.c b/Python/compile.c index 10d6307a48406f..04665f4a708677 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4691,6 +4691,7 @@ check_caller(struct compiler *c, expr_ty e) case SetComp_kind: case GeneratorExp_kind: case JoinedStr_kind: + case TagString_kind: case FormattedValue_kind: return compiler_warn(c, "'%.200s' object is not callable; " "perhaps you missed a comma?", @@ -4750,6 +4751,7 @@ check_index(struct compiler *c, expr_ty e, expr_ty s) case List_kind: case ListComp_kind: case JoinedStr_kind: + case TagString_kind: case FormattedValue_kind: return compiler_warn(c, "%.200s indices must be integers or slices, " "not %.200s; " @@ -5835,6 +5837,9 @@ compiler_visit_expr1(struct compiler *c, expr_ty e) break; case JoinedStr_kind: return compiler_joined_str(c, e); + case TagString_kind: + return compiler_error(c, "TagString not yet supported"); + break; case FormattedValue_kind: return compiler_formatted_value(c, e); /* The following exprs can be assignment targets. */ From a17f33606dad717e0940688e069b2787c2e236f4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 1 May 2022 18:05:40 -0600 Subject: [PATCH 02/12] Some progress -- translate foo'bar' into foo('bar') --- Python/ast_opt.c | 4 ++++ Python/ast_unparse.c | 2 ++ Python/compile.c | 35 ++++++++++++++++++++++++++++++++--- Python/symtable.c | 4 ++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/Python/ast_opt.c b/Python/ast_opt.c index b1d807bcf10ae1..c6f636c3a88cd5 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -782,6 +782,10 @@ astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) case JoinedStr_kind: CALL_SEQ(astfold_expr, expr, node_->v.JoinedStr.values); break; + case TagString_kind: + CALL(astfold_expr, expr_ty, node_->v.TagString.tag); + CALL(astfold_expr, expr_ty, node_->v.TagString.str); + break; case Attribute_kind: CALL(astfold_expr, expr_ty, node_->v.Attribute.value); break; diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index 6565b6b33ebd52..be83ea2781f8e5 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -887,6 +887,8 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) return append_ast_constant(writer, e->v.Constant.value); case JoinedStr_kind: return append_joinedstr(writer, e, false); + case TagString_kind: + assert(0); // TODO case FormattedValue_kind: return append_formattedvalue(writer, e); /* The following exprs can be assignment targets. */ diff --git a/Python/compile.c b/Python/compile.c index 04665f4a708677..5b4fd609c50536 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4691,7 +4691,6 @@ check_caller(struct compiler *c, expr_ty e) case SetComp_kind: case GeneratorExp_kind: case JoinedStr_kind: - case TagString_kind: case FormattedValue_kind: return compiler_warn(c, "'%.200s' object is not callable; " "perhaps you missed a comma?", @@ -4751,7 +4750,6 @@ check_index(struct compiler *c, expr_ty e, expr_ty s) case List_kind: case ListComp_kind: case JoinedStr_kind: - case TagString_kind: case FormattedValue_kind: return compiler_warn(c, "%.200s indices must be integers or slices, " "not %.200s; " @@ -4910,6 +4908,37 @@ compiler_joined_str(struct compiler *c, expr_ty e) return 1; } +static int +compiler_tag_string(struct compiler *c, expr_ty e) +{ + if (e->kind == TagString_kind) { + expr_ty tag = e->v.TagString.tag; + expr_ty str = e->v.TagString.str; + if (tag->kind == Name_kind) { + if (str->kind == Constant_kind) { + PyObject *value = str->v.Constant.value; + PyObject *kind = str->v.Constant.kind; + if (kind == NULL && PyUnicode_CheckExact(value)) { + // Generate code for tag(value) + asdl_expr_seq *args = + _Py_asdl_expr_seq_new(1, c->c_arena); + if (args == NULL) + return 0; + asdl_seq_SET(args, 0, str); + asdl_keyword_seq *keywords = + _Py_asdl_keyword_seq_new(0, c->c_arena); + if (keywords == NULL) + return 0; + ADDOP(c, PUSH_NULL); + VISIT(c, expr, tag); + return compiler_call_helper(c, 0, args, keywords); + } + } + } + } + return compiler_error(c, "More complicated tag-string not yet supported"); +} + /* Used to implement f-strings. Format a single value. */ static int compiler_formatted_value(struct compiler *c, expr_ty e) @@ -5838,7 +5867,7 @@ compiler_visit_expr1(struct compiler *c, expr_ty e) case JoinedStr_kind: return compiler_joined_str(c, e); case TagString_kind: - return compiler_error(c, "TagString not yet supported"); + return compiler_tag_string(c, e); break; case FormattedValue_kind: return compiler_formatted_value(c, e); diff --git a/Python/symtable.c b/Python/symtable.c index 0b259b08b61f97..1690795733c804 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -1686,6 +1686,10 @@ symtable_visit_expr(struct symtable *st, expr_ty e) case JoinedStr_kind: VISIT_SEQ(st, expr, e->v.JoinedStr.values); break; + case TagString_kind: + VISIT(st, expr, e->v.TagString.tag); + VISIT(st, expr, e->v.TagString.str); + break; case Constant_kind: /* Nothing to do here. */ break; From a9922f4824c267ac88f11725b48a0ae4681901ae Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sun, 1 May 2022 18:21:33 -0600 Subject: [PATCH 03/12] Correctly unparse TagString node --- Python/ast_unparse.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index be83ea2781f8e5..2583ae53b93dfc 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -662,6 +662,18 @@ append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) return result; } +static int +append_tagstring(_PyUnicodeWriter *writer, expr_ty e, int level) +{ + if (-1 == append_ast_expr(writer, e->v.TagString.tag, level)) + return -1; + if (-1 == append_charp(writer, " ")) + return -1; + if (-1 == append_ast_expr(writer, e->v.TagString.str, level)) + return -1; + return 0; +} + static int append_formattedvalue(_PyUnicodeWriter *writer, expr_ty e) { @@ -888,7 +900,7 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) case JoinedStr_kind: return append_joinedstr(writer, e, false); case TagString_kind: - assert(0); // TODO + return append_tagstring(writer, e, level); case FormattedValue_kind: return append_formattedvalue(writer, e); /* The following exprs can be assignment targets. */ From 12ced2c289c02a093ac3c932b3df911eb5ab4f74 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 13:44:19 -0600 Subject: [PATCH 04/12] Accept only one string, make it a raw bytes --- Grammar/python.gram | 2 +- Parser/action_helpers.c | 5 ++++- Parser/parser.c | 14 ++++++-------- Parser/pegen.h | 2 +- Python/compile.c | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Grammar/python.gram b/Grammar/python.gram index c2eb667f06f30a..5e25a4d66cd6bf 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -796,7 +796,7 @@ slice[expr_ty]: | a=named_expression { a } atom[expr_ty]: - | a=NAME &STRING b=strings { _PyPegen_tag_string(p, a, b) } + | a=NAME b=STRING { _PyPegen_tag_string(p, a, b) } | NAME | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 37312ea11c2be1..7ac8576ca2c1d0 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -870,11 +870,14 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) } expr_ty -_PyPegen_tag_string(Parser *p, expr_ty tag, expr_ty str) +_PyPegen_tag_string(Parser *p, expr_ty tag, Token *t) { // No prefixes (f, r, b, u) // Parse like fstring // Create a node similar to f-string AST + expr_ty str = _PyAST_Constant(t->bytes, NULL, t->lineno, + t->col_offset, t->end_lineno, + t->end_col_offset, p->arena); return _PyAST_TagString(tag, str, tag->lineno, tag->col_offset, str->end_lineno, str->end_col_offset, p->arena); diff --git a/Parser/parser.c b/Parser/parser.c index ad1196e8a213d1..38af34d1f951c7 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14111,7 +14111,7 @@ slice_rule(Parser *p) } // atom: -// | NAME &STRING strings +// | NAME STRING // | NAME // | 'True' // | 'False' @@ -14144,23 +14144,21 @@ atom_rule(Parser *p) UNUSED(_start_lineno); // Only used by EXTRA macro int _start_col_offset = p->tokens[_mark]->col_offset; UNUSED(_start_col_offset); // Only used by EXTRA macro - { // NAME &STRING strings + { // NAME STRING if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME &STRING strings")); + D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "NAME STRING")); expr_ty a; expr_ty b; if ( (a = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(1, _PyPegen_string_token, p) - && - (b = strings_rule(p)) // strings + (b = _PyPegen_string_token(p)) // STRING ) { - D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME &STRING strings")); + D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME STRING")); _res = _PyPegen_tag_string ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14171,7 +14169,7 @@ atom_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s atom[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME &STRING strings")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NAME STRING")); } { // NAME if (p->error_indicator) { diff --git a/Parser/pegen.h b/Parser/pegen.h index 31340e46dc821c..91e998a43530fa 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -317,7 +317,7 @@ expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); -expr_ty _PyPegen_tag_string(Parser *p, expr_ty, expr_ty); +expr_ty _PyPegen_tag_string(Parser *p, expr_ty, Token *); expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty); expr_ty _PyPegen_ensure_real(Parser *p, expr_ty); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); diff --git a/Python/compile.c b/Python/compile.c index 5b4fd609c50536..b2392cdb11fd93 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4918,7 +4918,7 @@ compiler_tag_string(struct compiler *c, expr_ty e) if (str->kind == Constant_kind) { PyObject *value = str->v.Constant.value; PyObject *kind = str->v.Constant.kind; - if (kind == NULL && PyUnicode_CheckExact(value)) { + if (kind == NULL && PyBytes_CheckExact(value)) { // Generate code for tag(value) asdl_expr_seq *args = _Py_asdl_expr_seq_new(1, c->c_arena); From 73f3c34429e6d5838d8c61081768942407bb32fe Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 14:54:17 -0600 Subject: [PATCH 05/12] Parse the string as f-string --- Grammar/python.gram | 4 ++-- Parser/action_helpers.c | 18 +++++++++++------- Parser/parser.c | 4 ++-- Parser/pegen.h | 2 +- Parser/string_parser.c | 16 ++++++++++------ Parser/string_parser.h | 4 ++-- Python/compile.c | 32 ++++++++++++++------------------ 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/Grammar/python.gram b/Grammar/python.gram index 5e25a4d66cd6bf..3734a271119ddc 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -796,7 +796,7 @@ slice[expr_ty]: | a=named_expression { a } atom[expr_ty]: - | a=NAME b=STRING { _PyPegen_tag_string(p, a, b) } + | a=NAME b=STRING { _PyPegen_tag_string(p, a, (Token *)b) } | NAME | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } @@ -871,7 +871,7 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) } # LITERALS # ======== -strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) } +strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a, 0) } list[expr_ty]: | '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) } diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 7ac8576ca2c1d0..b0bb41e1b18a46 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -870,14 +870,18 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) } expr_ty -_PyPegen_tag_string(Parser *p, expr_ty tag, Token *t) +_PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) { // No prefixes (f, r, b, u) // Parse like fstring // Create a node similar to f-string AST - expr_ty str = _PyAST_Constant(t->bytes, NULL, t->lineno, - t->col_offset, t->end_lineno, - t->end_col_offset, p->arena); + asdl_generic_seq *tokens = _Py_asdl_generic_seq_new(1, p->arena); + if (tokens == NULL) + return NULL; + asdl_seq_SET(tokens, 0, tok); + expr_ty str = _PyPegen_concatenate_strings(p, (asdl_seq *)tokens, 1); + if (str == NULL) + return NULL; return _PyAST_TagString(tag, str, tag->lineno, tag->col_offset, str->end_lineno, str->end_col_offset, p->arena); @@ -886,7 +890,7 @@ _PyPegen_tag_string(Parser *p, expr_ty tag, Token *t) expr_ty -_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) +_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings, int tagged) { Py_ssize_t len = asdl_seq_LEN(strings); assert(len > 0); @@ -898,7 +902,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) PyObject *bytes_str = NULL; FstringParser state; - _PyPegen_FstringParser_Init(&state); + _PyPegen_FstringParser_Init(&state, tagged); for (Py_ssize_t i = 0; i < len; i++) { Token *t = asdl_seq_GET_UNTYPED(strings, i); @@ -909,7 +913,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) const char *fstr; Py_ssize_t fstrlen = -1; - if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) { + if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t, tagged) != 0) { goto error; } diff --git a/Parser/parser.c b/Parser/parser.c index 38af34d1f951c7..2fa4b362da794c 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14159,7 +14159,7 @@ atom_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME STRING")); - _res = _PyPegen_tag_string ( p , a , b ); + _res = _PyPegen_tag_string ( p , a , ( Token* ) b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; @@ -15488,7 +15488,7 @@ strings_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ strings[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "STRING+")); - _res = _PyPegen_concatenate_strings ( p , a ); + _res = _PyPegen_concatenate_strings ( p , a , 0 ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; diff --git a/Parser/pegen.h b/Parser/pegen.h index 91e998a43530fa..b813feeea8130d 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -316,7 +316,7 @@ asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *); expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); -expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); +expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *, int); expr_ty _PyPegen_tag_string(Parser *p, expr_ty, Token *); expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty); expr_ty _PyPegen_ensure_real(Parser *p, expr_ty); diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 9c12d8ca101d00..734f09fc78d720 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -166,7 +166,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) string object. Return 0 if no errors occurred. */ int _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, - const char **fstr, Py_ssize_t *fstrlen, Token *t) + const char **fstr, Py_ssize_t *fstrlen, Token *t, int tagged) { const char *s = PyBytes_AsString(t->bytes); if (s == NULL) { @@ -175,12 +175,16 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, size_t len; int quote = Py_CHARMASK(*s); - int fmode = 0; + int fmode = tagged; *bytesmode = 0; - *rawmode = 0; + *rawmode = tagged; *result = NULL; *fstr = NULL; if (Py_ISALPHA(quote)) { + if (tagged) { + RAISE_SYNTAX_ERROR("Cannot combine tag and letter prefix"); + return -1; + } while (!*bytesmode || !*rawmode) { if (quote == 'b' || quote == 'B') { quote =(unsigned char)*++s; @@ -1030,10 +1034,10 @@ FstringParser_check_invariants(FstringParser *state) #endif void -_PyPegen_FstringParser_Init(FstringParser *state) +_PyPegen_FstringParser_Init(FstringParser *state, int fmode) { state->last_str = NULL; - state->fmode = 0; + state->fmode = fmode; ExprList_Init(&state->expr_list); FstringParser_check_invariants(state); } @@ -1245,7 +1249,7 @@ fstring_parse(Parser *p, const char **str, const char *end, int raw, { FstringParser state; - _PyPegen_FstringParser_Init(&state); + _PyPegen_FstringParser_Init(&state, 0); if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl, first_token, t, last_token) < 0) { _PyPegen_FstringParser_Dealloc(&state); diff --git a/Parser/string_parser.h b/Parser/string_parser.h index 4a22f3d3086f47..e7fddc74e76b9b 100644 --- a/Parser/string_parser.h +++ b/Parser/string_parser.h @@ -33,9 +33,9 @@ typedef struct { int fmode; } FstringParser; -void _PyPegen_FstringParser_Init(FstringParser *); +void _PyPegen_FstringParser_Init(FstringParser *, int); int _PyPegen_parsestr(Parser *, int *, int *, PyObject **, - const char **, Py_ssize_t *, Token *); + const char **, Py_ssize_t *, Token *, int); int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **, const char *, int, int, Token *, Token *, Token *); diff --git a/Python/compile.c b/Python/compile.c index b2392cdb11fd93..eeb15b8bc9b3a5 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4915,24 +4915,20 @@ compiler_tag_string(struct compiler *c, expr_ty e) expr_ty tag = e->v.TagString.tag; expr_ty str = e->v.TagString.str; if (tag->kind == Name_kind) { - if (str->kind == Constant_kind) { - PyObject *value = str->v.Constant.value; - PyObject *kind = str->v.Constant.kind; - if (kind == NULL && PyBytes_CheckExact(value)) { - // Generate code for tag(value) - asdl_expr_seq *args = - _Py_asdl_expr_seq_new(1, c->c_arena); - if (args == NULL) - return 0; - asdl_seq_SET(args, 0, str); - asdl_keyword_seq *keywords = - _Py_asdl_keyword_seq_new(0, c->c_arena); - if (keywords == NULL) - return 0; - ADDOP(c, PUSH_NULL); - VISIT(c, expr, tag); - return compiler_call_helper(c, 0, args, keywords); - } + if (str->kind == JoinedStr_kind) { + // Generate code for tag(str) + asdl_expr_seq *args = + _Py_asdl_expr_seq_new(1, c->c_arena); + if (args == NULL) + return 0; + asdl_seq_SET(args, 0, str); + asdl_keyword_seq *keywords = + _Py_asdl_keyword_seq_new(0, c->c_arena); + if (keywords == NULL) + return 0; + ADDOP(c, PUSH_NULL); + VISIT(c, expr, tag); + return compiler_call_helper(c, 0, args, keywords); } } } From 67175b0b35428ba91d9f2b3c1747d2ec42b39b09 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 16:27:58 -0600 Subject: [PATCH 06/12] Start calling tag(*args) --- Python/compile.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index eeb15b8bc9b3a5..5ab49fd79b50ee 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4916,19 +4916,14 @@ compiler_tag_string(struct compiler *c, expr_ty e) expr_ty str = e->v.TagString.str; if (tag->kind == Name_kind) { if (str->kind == JoinedStr_kind) { - // Generate code for tag(str) - asdl_expr_seq *args = - _Py_asdl_expr_seq_new(1, c->c_arena); - if (args == NULL) - return 0; - asdl_seq_SET(args, 0, str); + // Generate code for tag(str1, str2, ...) asdl_keyword_seq *keywords = _Py_asdl_keyword_seq_new(0, c->c_arena); if (keywords == NULL) return 0; ADDOP(c, PUSH_NULL); VISIT(c, expr, tag); - return compiler_call_helper(c, 0, args, keywords); + return compiler_call_helper(c, 0, str->v.JoinedStr.values, keywords); } } } From 1c36df7e8908b5fa075d55695c2275bfcc31a176 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 17:43:38 -0600 Subject: [PATCH 07/12] Pass lambdas (not quite thunks, but a step in the right direction) --- Parser/action_helpers.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index b0bb41e1b18a46..200d3357deedf2 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -869,12 +869,21 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) return new_seq; } +static expr_ty +lambdafy(Parser *p, expr_ty arg) +{ + assert(arg->kind == FormattedValue_kind); + arguments_ty args = _PyPegen_empty_arguments(p); + if (args == NULL) + return NULL; + return _PyAST_Lambda(args, arg, + arg->lineno, arg->col_offset, arg->end_lineno, arg->end_col_offset, + p->arena); +} + expr_ty _PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) { - // No prefixes (f, r, b, u) - // Parse like fstring - // Create a node similar to f-string AST asdl_generic_seq *tokens = _Py_asdl_generic_seq_new(1, p->arena); if (tokens == NULL) return NULL; @@ -882,13 +891,25 @@ _PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) expr_ty str = _PyPegen_concatenate_strings(p, (asdl_seq *)tokens, 1); if (str == NULL) return NULL; + if (str->kind == JoinedStr_kind) { + // Transform FormattedValue items into thunks + asdl_expr_seq *values = str->v.JoinedStr.values; + int nvalues = asdl_seq_LEN(values); + for (int i = 0; i < nvalues; i++) { + expr_ty value = asdl_seq_GET(values, i); + if (value->kind == FormattedValue_kind) { + value = lambdafy(p, value); + if (value == NULL) + return NULL; + asdl_seq_SET(values, i, value); + } + } + } return _PyAST_TagString(tag, str, tag->lineno, tag->col_offset, str->end_lineno, str->end_col_offset, p->arena); - } - expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings, int tagged) { From d5a0e64502f8b8f355750c6005fe50fae2c6552c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 22:04:48 -0600 Subject: [PATCH 08/12] Lambdafy the expression only, exclusing conversion, spec --- Parser/action_helpers.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 200d3357deedf2..b6e1a4aabe26eb 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -872,7 +872,6 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs) static expr_ty lambdafy(Parser *p, expr_ty arg) { - assert(arg->kind == FormattedValue_kind); arguments_ty args = _PyPegen_empty_arguments(p); if (args == NULL) return NULL; @@ -898,10 +897,10 @@ _PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) for (int i = 0; i < nvalues; i++) { expr_ty value = asdl_seq_GET(values, i); if (value->kind == FormattedValue_kind) { - value = lambdafy(p, value); - if (value == NULL) + expr_ty lambda = lambdafy(p, value->v.FormattedValue.value); + if (lambda == NULL) return NULL; - asdl_seq_SET(values, i, value); + asdl_seq_SET(values, i, lambda); } } } From 94c73358c4c4d271ac23ae928dc7c0b3db3394cb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 22:30:09 -0600 Subject: [PATCH 09/12] Generate a (func, rawstr, conv, spec) tuple for thunks --- Parser/action_helpers.c | 57 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index b6e1a4aabe26eb..7f838e74144666 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -891,16 +891,67 @@ _PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) if (str == NULL) return NULL; if (str->kind == JoinedStr_kind) { - // Transform FormattedValue items into thunks + // Transform FormattedValue items into thunks (for now, tuples) asdl_expr_seq *values = str->v.JoinedStr.values; int nvalues = asdl_seq_LEN(values); + expr_ty none = NULL; for (int i = 0; i < nvalues; i++) { expr_ty value = asdl_seq_GET(values, i); if (value->kind == FormattedValue_kind) { - expr_ty lambda = lambdafy(p, value->v.FormattedValue.value); + if (none == NULL) { + none = _PyAST_Constant(Py_None, NULL, + str->lineno, str->col_offset, + str->end_lineno, str->end_col_offset, + p->arena); + if (none == NULL) + return NULL; + } + expr_ty expr = value->v.FormattedValue.value; + expr_ty lambda = lambdafy(p, expr); if (lambda == NULL) return NULL; - asdl_seq_SET(values, i, lambda); + constant rawstr = _PyAST_ExprAsUnicode(expr); + if (rawstr == NULL) + return NULL; + expr_ty raw = _PyAST_Constant(rawstr, NULL, + expr->lineno, expr->col_offset, + expr->end_lineno, expr->end_col_offset, + p->arena); + if (raw == NULL) + return NULL; + expr_ty conv = none; + int conversion = value->v.FormattedValue.conversion; + if (conversion >= 0) { + char buf[1]; + buf[0] = conversion; + constant uconv = _PyUnicode_FromASCII(buf, 1); + if (uconv == NULL) + return NULL; + conv = _PyAST_Constant(uconv, NULL, + expr->lineno, expr->col_offset, + expr->end_lineno, expr->end_col_offset, + p->arena); + if (conv == NULL) + return NULL; + } + expr_ty spec = value->v.FormattedValue.format_spec; + if (spec == NULL) { + spec = none; + } + asdl_expr_seq *elts = _Py_asdl_expr_seq_new(4, p->arena); + if (elts == NULL) + return NULL; + asdl_seq_SET(elts, 0, lambda); + asdl_seq_SET(elts, 1, raw); + asdl_seq_SET(elts, 2, conv); + asdl_seq_SET(elts, 3, spec); + expr_ty tuple = _PyAST_Tuple(elts, Load, + value->lineno, value->col_offset, + value->end_lineno, value->end_col_offset, + p->arena); + if (tuple == NULL) + return NULL; + asdl_seq_SET(values, i, tuple); } } } From 399eaba32d4f390be5da2dbe497cd6aaa898f4f4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 2 May 2022 23:01:14 -0600 Subject: [PATCH 10/12] Restore original signature of _PyPegen_FstringParser_Init --- Parser/action_helpers.c | 2 +- Parser/string_parser.c | 6 +++--- Parser/string_parser.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 7f838e74144666..27615858e63446 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -973,7 +973,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings, int tagged) PyObject *bytes_str = NULL; FstringParser state; - _PyPegen_FstringParser_Init(&state, tagged); + _PyPegen_FstringParser_Init(&state); for (Py_ssize_t i = 0; i < len; i++) { Token *t = asdl_seq_GET_UNTYPED(strings, i); diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 734f09fc78d720..e2a0839b365de7 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -1034,10 +1034,10 @@ FstringParser_check_invariants(FstringParser *state) #endif void -_PyPegen_FstringParser_Init(FstringParser *state, int fmode) +_PyPegen_FstringParser_Init(FstringParser *state) { state->last_str = NULL; - state->fmode = fmode; + state->fmode = 0; ExprList_Init(&state->expr_list); FstringParser_check_invariants(state); } @@ -1249,7 +1249,7 @@ fstring_parse(Parser *p, const char **str, const char *end, int raw, { FstringParser state; - _PyPegen_FstringParser_Init(&state, 0); + _PyPegen_FstringParser_Init(&state); if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl, first_token, t, last_token) < 0) { _PyPegen_FstringParser_Dealloc(&state); diff --git a/Parser/string_parser.h b/Parser/string_parser.h index e7fddc74e76b9b..bde64bde551c33 100644 --- a/Parser/string_parser.h +++ b/Parser/string_parser.h @@ -33,7 +33,7 @@ typedef struct { int fmode; } FstringParser; -void _PyPegen_FstringParser_Init(FstringParser *, int); +void _PyPegen_FstringParser_Init(FstringParser *); int _PyPegen_parsestr(Parser *, int *, int *, PyObject **, const char **, Py_ssize_t *, Token *, int); int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **, From c78e4a10c187fdc7cbf54450efc182fe104aefd8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 3 May 2022 22:23:42 -0600 Subject: [PATCH 11/12] Fix unparsing of tagstring --- Python/ast_unparse.c | 68 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index 2583ae53b93dfc..dca9b2817e115f 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -21,7 +21,7 @@ expr_as_unicode(expr_ty e, int level); static int append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level); static int -append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec); +append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec, bool is_tag_str); static int append_formattedvalue(_PyUnicodeWriter *writer, expr_ty e); static int @@ -597,6 +597,41 @@ append_fstring_unicode(_PyUnicodeWriter *writer, PyObject *unicode) return result; } +static int +append_interpolation(_PyUnicodeWriter *writer, expr_ty e) +{ + APPEND_STR("{"); + if (e->kind == Tuple_kind) { + asdl_expr_seq *elts = e->v.Tuple.elts; + if (asdl_seq_LEN(elts) == 4) { + expr_ty raw = asdl_seq_GET(elts, 1); + if (raw->kind == Constant_kind) { + constant c = raw->v.Constant.value; + if (PyUnicode_CheckExact(c)) { + if (-1 == _PyUnicodeWriter_WriteStr(writer, c)) + return -1; + } + } + expr_ty conv = asdl_seq_GET(elts, 2); + if (conv->kind == Constant_kind) { + constant c = conv->v.Constant.value; + if (PyUnicode_CheckExact(c)) { + APPEND_STR("!"); + if (-1 == _PyUnicodeWriter_WriteStr(writer, c)) + return -1; + } + } + expr_ty spec = asdl_seq_GET(elts, 3); + if (spec->kind == JoinedStr_kind) { + APPEND_STR(":"); + if (-1 == append_joinedstr(writer, spec, true, false)) + return -1; + } + } + } + APPEND_STR_FINISH("}"); +} + static int append_fstring_element(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) { @@ -604,9 +639,11 @@ append_fstring_element(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) case Constant_kind: return append_fstring_unicode(writer, e->v.Constant.value); case JoinedStr_kind: - return append_joinedstr(writer, e, is_format_spec); + return append_joinedstr(writer, e, is_format_spec, false); case FormattedValue_kind: return append_formattedvalue(writer, e); + case Tuple_kind: + return append_interpolation(writer, e); default: PyErr_SetString(PyExc_SystemError, "unknown expression kind inside f-string"); @@ -640,16 +677,19 @@ build_fstring_body(asdl_expr_seq *values, bool is_format_spec) } static int -append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) +append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec, bool is_tag_str) { - int result = -1; PyObject *body = build_fstring_body(e->v.JoinedStr.values, is_format_spec); if (!body) { return -1; } + int result = 0; if (!is_format_spec) { - if (-1 != append_charp(writer, "f") && + if (!is_tag_str) { + result = append_charp(writer, "f"); + } + if (-1 != result && -1 != append_repr(writer, body)) { result = 0; @@ -663,14 +703,14 @@ append_joinedstr(_PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) } static int -append_tagstring(_PyUnicodeWriter *writer, expr_ty e, int level) +append_tagstring(_PyUnicodeWriter *writer, expr_ty e) { - if (-1 == append_ast_expr(writer, e->v.TagString.tag, level)) - return -1; - if (-1 == append_charp(writer, " ")) - return -1; - if (-1 == append_ast_expr(writer, e->v.TagString.str, level)) - return -1; + APPEND_EXPR(e->v.TagString.tag, 0); + expr_ty str = e->v.TagString.str; + if (str->kind == JoinedStr_kind) { + if (-1 == append_joinedstr(writer, str, false, true)) + return -1; + } return 0; } @@ -898,9 +938,9 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) } return append_ast_constant(writer, e->v.Constant.value); case JoinedStr_kind: - return append_joinedstr(writer, e, false); + return append_joinedstr(writer, e, false, false); case TagString_kind: - return append_tagstring(writer, e, level); + return append_tagstring(writer, e); case FormattedValue_kind: return append_formattedvalue(writer, e); /* The following exprs can be assignment targets. */ From 1ec2fbe1bde02bcd495847b8ed73912c407cbdd8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 4 May 2022 07:29:04 -0600 Subject: [PATCH 12/12] Insist tag and str are flush together --- Parser/action_helpers.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 27615858e63446..71524e39555916 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -883,6 +883,13 @@ lambdafy(Parser *p, expr_ty arg) expr_ty _PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok) { + if (tag->end_lineno != tok->lineno || + tag->end_col_offset != tok->col_offset) { + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, + tag->end_lineno, tag->end_col_offset, + tok->lineno, tok->col_offset, + "cannot have space between tag and string"); + } asdl_generic_seq *tokens = _Py_asdl_generic_seq_new(1, p->arena); if (tokens == NULL) return NULL;