python · vstinner · Oct 14, 2020 · Sep 12, 2020 · Sep 12, 2020 · Sep 12, 2020
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
        if c.isalnum() or c == '.':
            if punct and chars:
                chars.append('_')
-            chars.append(c)
+            if c.isascii():
+                chars.append(c)
            punct = False
        else:
            punct = True

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -3440,5 +3440,22 @@ def search_function(encoding):
        self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))


+class EncodingNormalizationTest(unittest.TestCase):
+
+    def test_bpo39337(self):
+        """
+        bpo-39337: similar to _Py_normalize_encoding(),
+        encodings.normalize_encoding() should ignore non-ASCII letters.
+        """
+        import encodings
+
+        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf_8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf   8')
+        self.assertEqual(out, 'utf_8')
+
+
 if __name__ == "__main__":
    unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -0,0 +1 @@
+:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
-Original file line number
+Diff line change
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
             if c.isalnum() or c == '.':
                 if punct and chars:
                     chars.append('_')
-                chars.append(c)
+                if c.isascii():
+                    chars.append(c)
             Copy link

  
      
    
  

  
      

  
  Member


      

  

  
    
      

      
            vstinner
  

      

      

      


        Oct 11, 2020


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      I wanted to ask you to add a ".. versionchanged:: 3.10" entry in the documentation, but then I noticed that the encodings module was never documented! Oh!
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



    
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  Author


  

  
    
      

      
            shihai1991
  

      

      

      


        Oct 12, 2020


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      If end user will use this function or module, I can try to create the doc, but I need some time to do it :)
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



    
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  

  
    
      

      
            vstinner
  

      

      

      


        Oct 12, 2020


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      It can and must be addressed in a separated PR anymore. The lack of documentation should not hold this change.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



    
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  Author


  

  
    
      

      
            shihai1991
  

      

      

      


        Oct 12, 2020


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      ok, copy that.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
                 punct = False
             else:
                 punct = True
-          Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
shihai1991 marked this conversation as resolved. Show resolved Hide resolved