Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 374abde

Browse filesBrowse files
authored
gh-104400: pygettext: use an AST parser instead of a tokenizer (GH-104402)
This greatly simplifies the code and fixes many corner cases.
1 parent 1da412e commit 374abde
Copy full SHA for 374abde

File tree

Expand file treeCollapse file tree

7 files changed

+177
-254
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+177
-254
lines changed

‎Lib/test/test_tools/i18n_data/docstrings.pot

Copy file name to clipboardExpand all lines: Lib/test/test_tools/i18n_data/docstrings.pot
+21-7Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,40 @@ msgstr ""
1515
"Generated-By: pygettext.py 1.5\n"
1616

1717

18-
#: docstrings.py:7
18+
#: docstrings.py:1
19+
#, docstring
20+
msgid "Module docstring"
21+
msgstr ""
22+
23+
#: docstrings.py:9
1924
#, docstring
2025
msgid ""
2126
msgstr ""
2227

23-
#: docstrings.py:18
28+
#: docstrings.py:15
29+
#, docstring
30+
msgid "docstring"
31+
msgstr ""
32+
33+
#: docstrings.py:20
2434
#, docstring
2535
msgid ""
2636
"multiline\n"
27-
" docstring\n"
28-
" "
37+
"docstring"
2938
msgstr ""
3039

31-
#: docstrings.py:25
40+
#: docstrings.py:27
3241
#, docstring
3342
msgid "docstring1"
3443
msgstr ""
3544

36-
#: docstrings.py:30
45+
#: docstrings.py:38
46+
#, docstring
47+
msgid "nested docstring"
48+
msgstr ""
49+
50+
#: docstrings.py:43
3751
#, docstring
38-
msgid "Hello, {}!"
52+
msgid "nested class docstring"
3953
msgstr ""
4054

‎Lib/test/test_tools/i18n_data/docstrings.py

Copy file name to clipboardExpand all lines: Lib/test/test_tools/i18n_data/docstrings.py
+7-5Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Module docstring"""
2+
13
# Test docstring extraction
24
from gettext import gettext as _
35

@@ -10,10 +12,10 @@ def test(x):
1012
# Leading empty line
1113
def test2(x):
1214

13-
"""docstring""" # XXX This should be extracted but isn't.
15+
"""docstring"""
1416

1517

16-
# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
18+
# Multiline docstrings are cleaned with `inspect.cleandoc`.
1719
def test3(x):
1820
"""multiline
1921
docstring
@@ -27,15 +29,15 @@ def test4(x):
2729

2830

2931
def test5(x):
30-
"""Hello, {}!""".format("world!") # XXX This should not be extracted.
32+
"""Hello, {}!""".format("world!") # This should not be extracted.
3133

3234

3335
# Nested docstrings
3436
def test6(x):
3537
def inner(y):
36-
"""nested docstring""" # XXX This should be extracted but isn't.
38+
"""nested docstring"""
3739

3840

3941
class Outer:
4042
class Inner:
41-
"nested class docstring" # XXX This should be extracted but isn't.
43+
"nested class docstring"

‎Lib/test/test_tools/i18n_data/messages.pot

Copy file name to clipboardExpand all lines: Lib/test/test_tools/i18n_data/messages.pot
+16-6Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ msgstr ""
1919
msgid ""
2020
msgstr ""
2121

22-
#: messages.py:19 messages.py:20
22+
#: messages.py:19 messages.py:20 messages.py:21
2323
msgid "parentheses"
2424
msgstr ""
2525

26-
#: messages.py:23
26+
#: messages.py:24
2727
msgid "Hello, world!"
2828
msgstr ""
2929

30-
#: messages.py:26
30+
#: messages.py:27
3131
msgid ""
3232
"Hello,\n"
3333
" multiline!\n"
3434
msgstr ""
3535

3636
#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
37-
#: messages.py:99
37+
#: messages.py:99 messages.py:100 messages.py:101
3838
msgid "foo"
3939
msgid_plural "foos"
4040
msgstr[0] ""
@@ -68,22 +68,32 @@ msgstr ""
6868
msgid "set"
6969
msgstr ""
7070

71-
#: messages.py:63
71+
#: messages.py:62 messages.py:63
7272
msgid "nested string"
7373
msgstr ""
7474

7575
#: messages.py:68
7676
msgid "baz"
7777
msgstr ""
7878

79+
#: messages.py:71 messages.py:75
80+
msgid "default value"
81+
msgstr ""
82+
7983
#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
8084
msgctxt "context"
8185
msgid "foo"
8286
msgid_plural "foos"
8387
msgstr[0] ""
8488
msgstr[1] ""
8589

86-
#: messages.py:100
90+
#: messages.py:102
8791
msgid "domain foo"
8892
msgstr ""
8993

94+
#: messages.py:118 messages.py:119
95+
msgid "world"
96+
msgid_plural "worlds"
97+
msgstr[0] ""
98+
msgstr[1] ""
99+

‎Lib/test/test_tools/i18n_data/messages.py

Copy file name to clipboardExpand all lines: Lib/test/test_tools/i18n_data/messages.py
+15-6Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# Extra parentheses
1919
(_("parentheses"))
2020
((_("parentheses")))
21+
_(("parentheses"))
2122

2223
# Multiline strings
2324
_("Hello, "
@@ -32,23 +33,22 @@
3233
_(None)
3334
_(1)
3435
_(False)
35-
_(("invalid"))
3636
_(["invalid"])
3737
_({"invalid"})
3838
_("string"[3])
3939
_("string"[:3])
4040
_({"string": "foo"})
4141

4242
# pygettext does not allow keyword arguments, but both xgettext and pybabel do
43-
_(x="kwargs work!")
43+
_(x="kwargs are not allowed!")
4444

4545
# Unusual, but valid arguments
4646
_("foo", "bar")
4747
_("something", x="something else")
4848

4949
# .format()
5050
_("Hello, {}!").format("world") # valid
51-
_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string
51+
_("Hello, {}!".format("world")) # invalid, but xgettext extracts the first string
5252

5353
# Nested structures
5454
_("1"), _("2")
@@ -59,7 +59,7 @@
5959

6060
# Nested functions and classes
6161
def test():
62-
_("nested string") # XXX This should be extracted but isn't.
62+
_("nested string")
6363
[_("nested string")]
6464

6565

@@ -68,11 +68,11 @@ def bar(self):
6868
return _("baz")
6969

7070

71-
def bar(x=_('default value')): # XXX This should be extracted but isn't.
71+
def bar(x=_('default value')):
7272
pass
7373

7474

75-
def baz(x=[_('default value')]): # XXX This should be extracted but isn't.
75+
def baz(x=[_('default value')]):
7676
pass
7777

7878

@@ -97,6 +97,8 @@ def _(x="don't extract me"):
9797

9898
# Complex arguments
9999
ngettext("foo", "foos", 42 + (10 - 20))
100+
ngettext("foo", "foos", *args)
101+
ngettext("foo", "foos", **kwargs)
100102
dgettext(["some", {"complex"}, ("argument",)], "domain foo")
101103

102104
# Invalid calls which are not extracted
@@ -108,3 +110,10 @@ def _(x="don't extract me"):
108110
dngettext('domain', 'foo')
109111
dpgettext('domain', 'context')
110112
dnpgettext('domain', 'context', 'foo')
113+
dgettext(*args, 'foo')
114+
dpgettext(*args, 'context', 'foo')
115+
dnpgettext(*args, 'context', 'foo', 'foos')
116+
117+
# f-strings
118+
f"Hello, {_('world')}!"
119+
f"Hello, {ngettext('world', 'worlds', 3)}!"

‎Lib/test/test_tools/test_i18n.py

Copy file name to clipboardExpand all lines: Lib/test/test_tools/test_i18n.py
+26-2Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def assert_POT_equal(self, expected, actual):
8787
self.maxDiff = None
8888
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
8989

90-
def extract_from_str(self, module_content, *, args=(), strict=True):
90+
def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False):
9191
"""Return all msgids extracted from module_content."""
9292
filename = 'test.py'
9393
with temp_cwd(None):
@@ -98,12 +98,18 @@ def extract_from_str(self, module_content, *, args=(), strict=True):
9898
self.assertEqual(res.err, b'')
9999
with open('messages.pot', encoding='utf-8') as fp:
100100
data = fp.read()
101-
return self.get_msgids(data)
101+
msgids = self.get_msgids(data)
102+
if not with_stderr:
103+
return msgids
104+
return msgids, res.err
102105

103106
def extract_docstrings_from_str(self, module_content):
104107
"""Return all docstrings extracted from module_content."""
105108
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
106109

110+
def get_stderr(self, module_content):
111+
return self.extract_from_str(module_content, strict=False, with_stderr=True)[1]
112+
107113
def test_header(self):
108114
"""Make sure the required fields are in the header, according to:
109115
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@@ -407,6 +413,24 @@ def test_files_list(self):
407413
self.assertIn(f'msgid "{text2}"', data)
408414
self.assertNotIn(text3, data)
409415

416+
def test_error_messages(self):
417+
"""Test that pygettext outputs error messages to stderr."""
418+
stderr = self.get_stderr(dedent('''\
419+
_(1+2)
420+
ngettext('foo')
421+
dgettext(*args, 'foo')
422+
'''))
423+
424+
# Normalize line endings on Windows
425+
stderr = stderr.decode('utf-8').replace('\r', '')
426+
427+
self.assertEqual(
428+
stderr,
429+
"*** test.py:1: Expected a string constant for argument 1, got 1 + 2\n"
430+
"*** test.py:2: Expected at least 2 positional argument(s) in gettext call, got 1\n"
431+
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
432+
)
433+
410434

411435
def update_POT_snapshots():
412436
for input_file in DATA_DIR.glob('*.py'):
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix several bugs in extraction by switching to an AST parser in :program:`pygettext`.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.