From ad2faeeedc5d87876b9a8fcc51095758f9b25eb8 Mon Sep 17 00:00:00 2001 From: jonathan Date: Wed, 3 Oct 2018 20:56:20 -0500 Subject: [PATCH 1/5] bpo 6686: xml.sax.xmlreader.XMLReader.getProperty (xml.sax.handler.property_xml_string) returns bytes --- Lib/xml/sax/expatreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index 5066ffc2fa51f02..defc59ca93fa0b9 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -177,7 +177,7 @@ def getProperty(self, name): elif name == property_xml_string: if self._parser: if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext() + return self._parser.GetInputContext().decode() else: raise SAXNotRecognizedException( "This version of expat does not support getting" From 3c616f74271b50daf413f8cb9e5d40201b461375 Mon Sep 17 00:00:00 2001 From: Jonathan Gossage Date: Fri, 5 Oct 2018 05:25:43 -0500 Subject: [PATCH 2/5] Cleanup style. --- Lib/test/test_sax.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 3044960a0ed1654..88874de5a16928e 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -13,7 +13,8 @@ from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser -from xml.sax.handler import feature_namespaces, feature_external_ges +from xml.sax.handler import feature_namespaces, feature_external_ges,\ + property_xml_string from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from io import BytesIO, StringIO import codecs @@ -1311,6 +1312,39 @@ def test_nsattrs_wattr(self): self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") +# =========================================================================== +# +# Sax parser property tests +# +# =========================================================================== + +class PropertyContentHandler(ContentHandler): + def __init__(self, test_harness, reader, *args, **kwargs): + super().__init__(*args, **kwargs) + self.test_harness = test_harness + self.reader = reader + + def startElement(self, name, attr): # @UnusedVariable + property_ = self.reader.getProperty(property_xml_string) + self.test_harness.assertIsInstance(property_, str) + + +class SaxPropertyTest(unittest.TestCase): + def test_xml_str_str(self): + reader = create_parser() + reader.setContentHandler(PropertyContentHandler(self, reader)) + reader.feed('') + reader.feed('Hi there') + reader.close() + + def test_xml_str_bytes(self): + reader = create_parser() + reader.setContentHandler(PropertyContentHandler(self, reader)) + reader.feed(b'') + reader.feed(b'Hi there') + reader.close() + + def test_main(): run_unittest(MakeParserTest, ParseTest, @@ -1323,7 +1357,9 @@ def test_main(): StreamReaderWriterXmlgenTest, ExpatReaderTest, ErrorReportingTest, - XmlReaderTest) + XmlReaderTest, + SaxPropertyTest) + if __name__ == "__main__": test_main() From afd7b3fb7afd32fc4392efe60633d620e20ce528 Mon Sep 17 00:00:00 2001 From: Jonathan Gossage Date: Fri, 5 Oct 2018 15:58:38 -0500 Subject: [PATCH 3/5] bpo-6686 Upgrade tests to chect input equal output. --- Lib/test/test_sax.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index 88874de5a16928e..ea20a8922a1624a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1316,33 +1316,44 @@ def test_nsattrs_wattr(self): # # Sax parser property tests # +# Currently only tests the condition reported in issue bpo-6686 +# # =========================================================================== -class PropertyContentHandler(ContentHandler): +class PropertyContentHandler(XMLGenerator): def __init__(self, test_harness, reader, *args, **kwargs): - super().__init__(*args, **kwargs) + test_harness.result = StringIO() + super().__init__(test_harness.result, *args, + encoding='UTF-8', **kwargs) self.test_harness = test_harness self.reader = reader - def startElement(self, name, attr): # @UnusedVariable + def startElement(self, name, attr): property_ = self.reader.getProperty(property_xml_string) self.test_harness.assertIsInstance(property_, str) + super().startElement(name, attr) class SaxPropertyTest(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.result = None + def test_xml_str_str(self): reader = create_parser() reader.setContentHandler(PropertyContentHandler(self, reader)) - reader.feed('') - reader.feed('Hi there') - reader.close() + input_\ + = '\nHi there' + reader.parse(StringIO(input_)) + self.assertEqual(input_, self.result.getvalue()) def test_xml_str_bytes(self): reader = create_parser() reader.setContentHandler(PropertyContentHandler(self, reader)) - reader.feed(b'') - reader.feed(b'Hi there') - reader.close() + input_\ + = b'\nHi there' + reader.parse(BytesIO(input_)) + self.assertEqual(input_.decode(), self.result.getvalue()) def test_main(): From b6ce633276abb4e6c0a1e3f71c79e80809e977a1 Mon Sep 17 00:00:00 2001 From: Jonathan Gossage Date: Mon, 15 Oct 2018 07:22:08 -0500 Subject: [PATCH 4/5] Upgrade test of property_xml_string to check all valid encodings --- Lib/test/test_sax.py | 64 ++++++++++++++++++++++++++++++-------------- Misc/ACKS | 1 + 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index ea20a8922a1624a..ab536ad13cee84a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1320,17 +1320,20 @@ def test_nsattrs_wattr(self): # # =========================================================================== -class PropertyContentHandler(XMLGenerator): - def __init__(self, test_harness, reader, *args, **kwargs): - test_harness.result = StringIO() - super().__init__(test_harness.result, *args, - encoding='UTF-8', **kwargs) +class PropertyContentHandler(ContentHandler): + def __init__(self, test_harness, reader, test, *args, **kwargs): + super().__init__(*args, **kwargs) self.test_harness = test_harness self.reader = reader + self.test_data = test def startElement(self, name, attr): property_ = self.reader.getProperty(property_xml_string) self.test_harness.assertIsInstance(property_, str) + if self.test_harness.test_data is not None: + self.test_harness\ + .assertEqual(property_, + self.test_data[1][1]) super().startElement(name, attr) @@ -1338,22 +1341,43 @@ class SaxPropertyTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.result = None - - def test_xml_str_str(self): - reader = create_parser() - reader.setContentHandler(PropertyContentHandler(self, reader)) - input_\ - = '\nHi there' - reader.parse(StringIO(input_)) - self.assertEqual(input_, self.result.getvalue()) - - def test_xml_str_bytes(self): + self.test_data = [['ascii', ['Hello']], + ['utf-8', ['abc˦']], + ['iso-8859-1', ['ghiéñ']], + ['utf-16', ['˦def']], + ['utf-16_be', ['jk˦l']], + ['utf_16_le', ['mno˦']]] + for t in self.test_data: + d = '{}\n'.format(t[1][0]) + t[1].append(d) + + def test_property_xml_string_from_bytes(self): + for prolog in (True, False): + for t in self.test_data: + reader = create_parser() + reader.setContentHandler(PropertyContentHandler(self, + reader, + t)) + source = InputSource() + data = b'' + if prolog: + data += b'\n' + data += t[1][1].encode(t[0]) + source.setByteStream(BytesIO(data)) + if not prolog: + source.setEncoding(t[0]) + reader.parse(source) + + def test_property_xml_str_from_str(self): + self.test_data = None reader = create_parser() - reader.setContentHandler(PropertyContentHandler(self, reader)) - input_\ - = b'\nHi there' - reader.parse(BytesIO(input_)) - self.assertEqual(input_.decode(), self.result.getvalue()) + reader.setContentHandler(PropertyContentHandler(self, + reader, + None)) + in_ = '\nHi there' + reader.parse(StringIO(in_)) def test_main(): diff --git a/Misc/ACKS b/Misc/ACKS index 272130f4e643ec3..fabdd40e44ef778 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -572,6 +572,7 @@ Chris Gonnerman Shelley Gooch David Goodger Elliot Gorokhovsky +Jonathan Gossage Hans de Graaff Tim Graham Kim Gräsman From c2bb9db5ffacd119b9606589f8e0a8e11c7b9ddf Mon Sep 17 00:00:00 2001 From: Jonathan Gossage Date: Wed, 17 Oct 2018 11:16:53 -0500 Subject: [PATCH 5/5] Handle all supported encodings. --- Lib/test/test_sax.py | 11 +++++------ Lib/xml/sax/expatreader.py | 8 +++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index ab536ad13cee84a..741ff9283113645 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -1329,10 +1329,11 @@ def __init__(self, test_harness, reader, test, *args, **kwargs): def startElement(self, name, attr): property_ = self.reader.getProperty(property_xml_string) - self.test_harness.assertIsInstance(property_, str) + self.test_harness.assertIsInstance(property_, bytes) if self.test_harness.test_data is not None: + prop = property_ self.test_harness\ - .assertEqual(property_, + .assertEqual(prop.decode(encoding=self.test_data[0]), self.test_data[1][1]) super().startElement(name, attr) @@ -1343,10 +1344,7 @@ def __init__(self, *args, **kwargs): self.result = None self.test_data = [['ascii', ['Hello']], ['utf-8', ['abc˦']], - ['iso-8859-1', ['ghiéñ']], - ['utf-16', ['˦def']], - ['utf-16_be', ['jk˦l']], - ['utf_16_le', ['mno˦']]] + ['iso-8859-1', ['ghiéñ']]] for t in self.test_data: d = '{}\n'.format(t[1][0]) t[1].append(d) @@ -1369,6 +1367,7 @@ def test_property_xml_string_from_bytes(self): if not prolog: source.setEncoding(t[0]) reader.parse(source) + pass def test_property_xml_str_from_str(self): self.test_data = None diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index defc59ca93fa0b9..48abf5d1d9528e0 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -3,8 +3,6 @@ pyexpat.__version__ == '2.22'. """ -version = "0.20" - from xml.sax._exceptions import * from xml.sax.handler import feature_validation, feature_namespaces from xml.sax.handler import feature_namespace_prefixes @@ -27,6 +25,8 @@ raise SAXReaderNotAvailable("expat not supported", None) from xml.sax import xmlreader, saxutils, handler +version = "0.20" + AttributesImpl = xmlreader.AttributesImpl AttributesNSImpl = xmlreader.AttributesNSImpl @@ -177,7 +177,7 @@ def getProperty(self, name): elif name == property_xml_string: if self._parser: if hasattr(self._parser, "GetInputContext"): - return self._parser.GetInputContext().decode() + return self._parser.GetInputContext() else: raise SAXNotRecognizedException( "This version of expat does not support getting" @@ -266,12 +266,14 @@ def _reset_lex_handler_prop(self): parser.EndCdataSectionHandler = None parser.StartDoctypeDeclHandler = None parser.EndDoctypeDeclHandler = None + parser.XmlDeclHandler = None else: parser.CommentHandler = lex.comment parser.StartCdataSectionHandler = lex.startCDATA parser.EndCdataSectionHandler = lex.endCDATA parser.StartDoctypeDeclHandler = self.start_doctype_decl parser.EndDoctypeDeclHandler = lex.endDTD + parser.XmlDeclHandler = lex.xml_decl_handler def reset(self): if self._namespaces: