Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a10babb

Browse filesBrowse files
authored
Implement soft keywords (hand-written and code generation) (python#129)
1 parent 6c50468 commit a10babb
Copy full SHA for a10babb

File tree

Expand file treeCollapse file tree

5 files changed

+58
-10
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+58
-10
lines changed

‎Parser/pegen/vm.c

Copy file name to clipboardExpand all lines: Parser/pegen/vm.c
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ run_vm(Parser *p, Rule rules[], int root)
153153
oparg = f->rule->opcodes[f->iop++];
154154
v = _PyPegen_expect_token(p, oparg);
155155
break;
156+
case OP_SOFT_KEYWORD:
157+
oparg = f->rule->opcodes[f->iop++];
158+
v = _PyPegen_expect_soft_keyword(p, soft_keywords[oparg]);
159+
break;
156160
case OP_RULE:
157161
oparg = f->rule->opcodes[f->iop++];
158162
Rule *rule = &rules[oparg];

‎Parser/pegen/vm.h

Copy file name to clipboardExpand all lines: Parser/pegen/vm.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ typedef enum _opcodes {
1212
OP_SUCCESS,
1313
OP_FAILURE,
1414
// The rest have an argument
15+
OP_SOFT_KEYWORD,
1516
OP_TOKEN,
1617
OP_RULE,
1718
OP_RETURN,
@@ -31,6 +32,7 @@ static char *opcode_names[] = {
3132
"OP_SUCCESS",
3233
"OP_FAILURE",
3334
// The rest have an argument
35+
"OP_SOFT_KEYWORD",
3436
"OP_TOKEN",
3537
"OP_RULE",
3638
"OP_RETURN",

‎Parser/pegen/vmparse.h

Copy file name to clipboardExpand all lines: Parser/pegen/vmparse.h
+15-3Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ static KeywordToken *reserved_keywords[] = {
88
},
99
};
1010

11+
enum {
12+
SK___PEG_PARSER__,
13+
};
14+
15+
static const char *soft_keywords[] = {
16+
"__peg_parser__",
17+
};
18+
1119
enum {
1220
R_START,
1321
R_STMT,
@@ -33,6 +41,7 @@ enum {
3341
A_FACTOR_1,
3442
A_FACTOR_2,
3543
A_FACTOR_3,
44+
A_FACTOR_4,
3645
A__GATHER_2_0,
3746
A__GATHER_2_1,
3847
};
@@ -78,12 +87,13 @@ static Rule all_rules[] = {
7887
},
7988
{"factor",
8089
R_FACTOR,
81-
{0, 8, 16, 19, -1},
90+
{0, 8, 16, 19, 23, -1},
8291
{
8392
OP_TOKEN, 7, OP_RULE, R_EXPR, OP_TOKEN, 8, OP_RETURN, A_FACTOR_0,
8493
OP_TOKEN, 9, OP_RULE, R__GATHER_2, OP_TOKEN, 10, OP_RETURN, A_FACTOR_1,
8594
OP_NUMBER, OP_RETURN, A_FACTOR_2,
86-
OP_NAME, OP_RETURN, A_FACTOR_3,
95+
OP_SOFT_KEYWORD, SK___PEG_PARSER__, OP_RETURN, A_FACTOR_3,
96+
OP_NAME, OP_RETURN, A_FACTOR_4,
8797
},
8898
},
8999
{"root",
@@ -132,7 +142,7 @@ call_action(Parser *p, Frame *_f, int _iaction)
132142
case A_EXPR_1:
133143
case A_TERM_1:
134144
case A_FACTOR_2:
135-
case A_FACTOR_3:
145+
case A_FACTOR_4:
136146
case A__GATHER_2_0:
137147
case A__GATHER_2_1:
138148
return _f->vals[0];
@@ -146,6 +156,8 @@ call_action(Parser *p, Frame *_f, int _iaction)
146156
return _f->vals[1];
147157
case A_FACTOR_1:
148158
return _Py_List ( _f->vals[1] , Load , EXTRA );
159+
case A_FACTOR_3:
160+
return RAISE_SYNTAX_ERROR("You found it!");
149161
default:
150162
assert(0);
151163
}

‎Tools/peg_generator/data/simple.gram

Copy file name to clipboardExpand all lines: Tools/peg_generator/data/simple.gram
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ factor:
1414
| '(' a=expr ')' { a }
1515
| '[' a=','.expr+ ']' { _Py_List(a, Load, EXTRA) }
1616
| NUMBER
17+
| "__peg_parser__" { RAISE_SYNTAX_ERROR("You found it!") }
1718
| NAME

‎Tools/peg_generator/pegen/vm_generator.py

Copy file name to clipboardExpand all lines: Tools/peg_generator/pegen/vm_generator.py
+36-7Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,27 @@ def __init__(
6868
self.gen = parser_generator
6969
self.cache: Dict[Any, Any] = {}
7070
self.keyword_cache: Dict[str, int] = {}
71+
self.soft_keyword_cache: List[str] = []
7172

72-
def keyword_helper(self, keyword: str) -> int:
73+
def keyword_helper(self, keyword: str) -> Tuple[str, int]:
7374
if keyword not in self.keyword_cache:
7475
self.keyword_cache[keyword] = self.gen.keyword_type()
75-
return self.keyword_cache[keyword]
76+
return "OP_TOKEN", self.keyword_cache[keyword]
7677

77-
def visit_StringLeaf(self, node: StringLeaf) -> int:
78+
def soft_keyword_helper(self, keyword: str) -> Tuple[str, str]:
79+
if keyword not in self.soft_keyword_cache:
80+
self.soft_keyword_cache.append(keyword)
81+
return "OP_SOFT_KEYWORD", f"SK_{keyword.upper()}"
82+
83+
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, Union[str, int]]:
7884
val = ast.literal_eval(node.value)
7985
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
80-
return self.keyword_helper(val)
81-
return token.EXACT_TOKEN_TYPES[val] # type: ignore [attr-defined]
86+
if node.value.endswith("'"):
87+
return self.keyword_helper(val)
88+
else:
89+
return self.soft_keyword_helper(val)
90+
tok_num = token.EXACT_TOKEN_TYPES[val]
91+
return "OP_TOKEN", token.tok_name[tok_num]
8292

8393
def visit_Repeat0(self, node: Repeat0) -> None:
8494
if node in self.cache:
@@ -133,6 +143,7 @@ def generate(self, filename: str) -> None:
133143
self.collect_todo()
134144
self.gather_actions()
135145
self._setup_keywords()
146+
self._setup_soft_keywords()
136147

137148
self.print("enum {")
138149
with self.indent():
@@ -194,6 +205,24 @@ def _setup_keywords(self) -> None:
194205
self.print("};")
195206
self.print()
196207

208+
def _setup_soft_keywords(self) -> None:
209+
soft_keywords = self.callmakervisitor.soft_keyword_cache
210+
if not soft_keywords:
211+
return
212+
213+
self.print("enum {")
214+
with self.indent():
215+
for soft_keyword in soft_keywords:
216+
self.print(f"SK_{soft_keyword.upper()},")
217+
self.print("};")
218+
self.print()
219+
self.print("static const char *soft_keywords[] = {")
220+
with self.indent():
221+
for soft_keyword in soft_keywords:
222+
self.print(f'"{soft_keyword}",')
223+
self.print("};")
224+
self.print()
225+
197226
def print_action_cases(self) -> None:
198227
unique_actions: Dict[str, List[str]] = defaultdict(list)
199228
for actionname, action in self.actions.items():
@@ -310,8 +339,8 @@ def visit_NameLeaf(self, node: NameLeaf) -> None:
310339
self.add_opcode("OP_RULE", self._get_rule_opcode(name))
311340

312341
def visit_StringLeaf(self, node: StringLeaf) -> None:
313-
token_type = self.callmakervisitor.visit(node)
314-
self.add_opcode("OP_TOKEN", token_type)
342+
op_pair = self.callmakervisitor.visit(node)
343+
self.add_opcode(*op_pair)
315344

316345
def handle_loop_rhs(
317346
self, node: Rhs, opcodes_by_alt: Dict[int, List[str]], collect_opcode: str,

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.