Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 6952482

Browse filesBrowse files
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects bytes literals and f-string expressions.
1 parent b7e1eff commit 6952482
Copy full SHA for 6952482

File tree

Expand file treeCollapse file tree

3 files changed

+76
-11
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+76
-11
lines changed

‎Lib/test/test_tools/test_i18n.py

Copy file name to clipboardExpand all lines: Lib/test/test_tools/test_i18n.py
+65-6Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import os
44
import sys
55
import unittest
6-
import textwrap
6+
from textwrap import dedent
77

88
from test.support.script_helper import assert_python_ok
99
from test.test_tools import skip_if_missing, toolsdir
@@ -109,25 +109,84 @@ def test_POT_Creation_Date(self):
109109
# This will raise if the date format does not exactly match.
110110
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
111111

112+
def test_funcdocstring(self):
113+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
114+
with self.subTest(doc):
115+
msgids = self.extract_docstrings_from_str(dedent('''\
116+
def foo(bar):
117+
%s
118+
''' % doc))
119+
self.assertIn('doc', msgids)
120+
121+
def test_funcdocstring_bytes(self):
122+
msgids = self.extract_docstrings_from_str(dedent('''\
123+
def foo(bar):
124+
b"""doc"""
125+
'''))
126+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
127+
128+
def test_funcdocstring_fstring(self):
129+
msgids = self.extract_docstrings_from_str(dedent('''\
130+
def foo(bar):
131+
f"""doc"""
132+
'''))
133+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
134+
135+
def test_classdocstring(self):
136+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
137+
with self.subTest(doc):
138+
msgids = self.extract_docstrings_from_str(dedent('''\
139+
class C:
140+
%s
141+
''' % doc))
142+
self.assertIn('doc', msgids)
143+
144+
def test_classdocstring_bytes(self):
145+
msgids = self.extract_docstrings_from_str(dedent('''\
146+
class C:
147+
b"""doc"""
148+
'''))
149+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
150+
151+
def test_classdocstring_fstring(self):
152+
msgids = self.extract_docstrings_from_str(dedent('''\
153+
class C:
154+
f"""doc"""
155+
'''))
156+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
157+
158+
def test_msgid(self):
159+
msgids = self.extract_docstrings_from_str(
160+
'''_("""doc""" r'str' u"ing")''')
161+
self.assertIn('docstring', msgids)
162+
163+
def test_msgid_bytes(self):
164+
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
165+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
166+
167+
def test_msgid_fstring(self):
168+
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
169+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
170+
112171
def test_funcdocstring_annotated_args(self):
113172
""" Test docstrings for functions with annotated args """
114-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
173+
msgids = self.extract_docstrings_from_str(dedent('''\
115174
def foo(bar: str):
116175
"""doc"""
117176
'''))
118177
self.assertIn('doc', msgids)
119178

120179
def test_funcdocstring_annotated_return(self):
121180
""" Test docstrings for functions with annotated return type """
122-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
181+
msgids = self.extract_docstrings_from_str(dedent('''\
123182
def foo(bar) -> str:
124183
"""doc"""
125184
'''))
126185
self.assertIn('doc', msgids)
127186

128187
def test_funcdocstring_defvalue_args(self):
129188
""" Test docstring for functions with default arg values """
130-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
189+
msgids = self.extract_docstrings_from_str(dedent('''\
131190
def foo(bar=()):
132191
"""doc"""
133192
'''))
@@ -137,7 +196,7 @@ def test_funcdocstring_multiple_funcs(self):
137196
""" Test docstring extraction for multiple functions combining
138197
annotated args, annotated return types and default arg values
139198
"""
140-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
199+
msgids = self.extract_docstrings_from_str(dedent('''\
141200
def foo1(bar: tuple=()) -> str:
142201
"""doc1"""
143202
@@ -155,7 +214,7 @@ def test_classdocstring_early_colon(self):
155214
""" Test docstring extraction for a class with colons occuring within
156215
the parentheses.
157216
"""
158-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
217+
msgids = self.extract_docstrings_from_str(dedent('''\
159218
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
160219
"""doc"""
161220
'''))
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:program:`pygettext.py` now recognizes only literal strings as docstrings
2+
and translatable strings, and rejects bytes literals and f-string expressions.

‎Tools/i18n/pygettext.py

Copy file name to clipboardExpand all lines: Tools/i18n/pygettext.py
+9-5Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
232232
return ''.join(escapes[b] for b in s.encode(encoding))
233233

234234

235+
def is_literal_string(s):
236+
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
237+
238+
235239
def safe_eval(s):
236240
# unwrap quotes, safely
237241
return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ def __init__(self, options):
317321
def __call__(self, ttype, tstring, stup, etup, line):
318322
# dispatch
319323
## import token
320-
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
321-
## 'tstring:', tstring
324+
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
325+
## file=sys.stderr)
322326
self.__state(ttype, tstring, stup[0])
323327

324328
def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ def __waiting(self, ttype, tstring, lineno):
327331
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
328332
# module docstring?
329333
if self.__freshmodule:
330-
if ttype == tokenize.STRING:
334+
if ttype == tokenize.STRING and is_literal_string(tstring):
331335
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
332336
self.__freshmodule = 0
333337
elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ def __suiteseen(self, ttype, tstring, lineno):
353357

354358
def __suitedocstring(self, ttype, tstring, lineno):
355359
# ignore any intervening noise
356-
if ttype == tokenize.STRING:
360+
if ttype == tokenize.STRING and is_literal_string(tstring):
357361
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
358362
self.__state = self.__waiting
359363
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ def __openseen(self, ttype, tstring, lineno):
378382
if self.__data:
379383
self.__addentry(EMPTYSTRING.join(self.__data))
380384
self.__state = self.__waiting
381-
elif ttype == tokenize.STRING:
385+
elif ttype == tokenize.STRING and is_literal_string(tstring):
382386
self.__data.append(safe_eval(tstring))
383387
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
384388
token.NEWLINE, tokenize.NL]:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.