Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 6f87093

Browse filesBrowse files
bpo-33189: pygettext.py now accepts only literal strings (GH-6364)
as docstrings and translatable strings, and rejects bytes literals and f-string expressions. (cherry picked from commit 6952482) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent fc8693d commit 6f87093
Copy full SHA for 6f87093

File tree

Expand file treeCollapse file tree

3 files changed

+76
-11
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+76
-11
lines changed

‎Lib/test/test_tools/test_i18n.py

Copy file name to clipboardExpand all lines: Lib/test/test_tools/test_i18n.py
+65-6Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import os
44
import unittest
5-
import textwrap
5+
from textwrap import dedent
66

77
from test.support.script_helper import assert_python_ok
88
from test.test_tools import skip_if_missing, toolsdir
@@ -107,25 +107,84 @@ def test_POT_Creation_Date(self):
107107
# This will raise if the date format does not exactly match.
108108
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')
109109

110+
def test_funcdocstring(self):
111+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
112+
with self.subTest(doc):
113+
msgids = self.extract_docstrings_from_str(dedent('''\
114+
def foo(bar):
115+
%s
116+
''' % doc))
117+
self.assertIn('doc', msgids)
118+
119+
def test_funcdocstring_bytes(self):
120+
msgids = self.extract_docstrings_from_str(dedent('''\
121+
def foo(bar):
122+
b"""doc"""
123+
'''))
124+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
125+
126+
def test_funcdocstring_fstring(self):
127+
msgids = self.extract_docstrings_from_str(dedent('''\
128+
def foo(bar):
129+
f"""doc"""
130+
'''))
131+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
132+
133+
def test_classdocstring(self):
134+
for doc in ('"""doc"""', "r'''doc'''", "R'doc'", 'u"doc"'):
135+
with self.subTest(doc):
136+
msgids = self.extract_docstrings_from_str(dedent('''\
137+
class C:
138+
%s
139+
''' % doc))
140+
self.assertIn('doc', msgids)
141+
142+
def test_classdocstring_bytes(self):
143+
msgids = self.extract_docstrings_from_str(dedent('''\
144+
class C:
145+
b"""doc"""
146+
'''))
147+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
148+
149+
def test_classdocstring_fstring(self):
150+
msgids = self.extract_docstrings_from_str(dedent('''\
151+
class C:
152+
f"""doc"""
153+
'''))
154+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
155+
156+
def test_msgid(self):
157+
msgids = self.extract_docstrings_from_str(
158+
'''_("""doc""" r'str' u"ing")''')
159+
self.assertIn('docstring', msgids)
160+
161+
def test_msgid_bytes(self):
162+
msgids = self.extract_docstrings_from_str('_(b"""doc""")')
163+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
164+
165+
def test_msgid_fstring(self):
166+
msgids = self.extract_docstrings_from_str('_(f"""doc""")')
167+
self.assertFalse([msgid for msgid in msgids if 'doc' in msgid])
168+
110169
def test_funcdocstring_annotated_args(self):
111170
""" Test docstrings for functions with annotated args """
112-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
171+
msgids = self.extract_docstrings_from_str(dedent('''\
113172
def foo(bar: str):
114173
"""doc"""
115174
'''))
116175
self.assertIn('doc', msgids)
117176

118177
def test_funcdocstring_annotated_return(self):
119178
""" Test docstrings for functions with annotated return type """
120-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
179+
msgids = self.extract_docstrings_from_str(dedent('''\
121180
def foo(bar) -> str:
122181
"""doc"""
123182
'''))
124183
self.assertIn('doc', msgids)
125184

126185
def test_funcdocstring_defvalue_args(self):
127186
""" Test docstring for functions with default arg values """
128-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
187+
msgids = self.extract_docstrings_from_str(dedent('''\
129188
def foo(bar=()):
130189
"""doc"""
131190
'''))
@@ -135,7 +194,7 @@ def test_funcdocstring_multiple_funcs(self):
135194
""" Test docstring extraction for multiple functions combining
136195
annotated args, annotated return types and default arg values
137196
"""
138-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
197+
msgids = self.extract_docstrings_from_str(dedent('''\
139198
def foo1(bar: tuple=()) -> str:
140199
"""doc1"""
141200
@@ -153,7 +212,7 @@ def test_classdocstring_early_colon(self):
153212
""" Test docstring extraction for a class with colons occuring within
154213
the parentheses.
155214
"""
156-
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
215+
msgids = self.extract_docstrings_from_str(dedent('''\
157216
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
158217
"""doc"""
159218
'''))
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:program:`pygettext.py` now recognizes only literal strings as docstrings
2+
and translatable strings, and rejects bytes literals and f-string expressions.

‎Tools/i18n/pygettext.py

Copy file name to clipboardExpand all lines: Tools/i18n/pygettext.py
+9-5Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
232232
return ''.join(escapes[b] for b in s.encode(encoding))
233233

234234

235+
def is_literal_string(s):
236+
return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
237+
238+
235239
def safe_eval(s):
236240
# unwrap quotes, safely
237241
return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ def __init__(self, options):
317321
def __call__(self, ttype, tstring, stup, etup, line):
318322
# dispatch
319323
## import token
320-
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
321-
## 'tstring:', tstring
324+
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
325+
## file=sys.stderr)
322326
self.__state(ttype, tstring, stup[0])
323327

324328
def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ def __waiting(self, ttype, tstring, lineno):
327331
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
328332
# module docstring?
329333
if self.__freshmodule:
330-
if ttype == tokenize.STRING:
334+
if ttype == tokenize.STRING and is_literal_string(tstring):
331335
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
332336
self.__freshmodule = 0
333337
elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ def __suiteseen(self, ttype, tstring, lineno):
353357

354358
def __suitedocstring(self, ttype, tstring, lineno):
355359
# ignore any intervening noise
356-
if ttype == tokenize.STRING:
360+
if ttype == tokenize.STRING and is_literal_string(tstring):
357361
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
358362
self.__state = self.__waiting
359363
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ def __openseen(self, ttype, tstring, lineno):
378382
if self.__data:
379383
self.__addentry(EMPTYSTRING.join(self.__data))
380384
self.__state = self.__waiting
381-
elif ttype == tokenize.STRING:
385+
elif ttype == tokenize.STRING and is_literal_string(tstring):
382386
self.__data.append(safe_eval(tstring))
383387
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
384388
token.NEWLINE, tokenize.NL]:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.