From 00fb7c42ee29b3cd27850851e9f2b6ef017fdc21 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 10 Jul 2020 08:16:51 +0200 Subject: [PATCH] Fix #274: String Types Py2 vs. Py3 compatibility This fixes problems between different string types. In Python2 str vs. unicode and in Python3 str vs. bytes. * Add some code from six project * Suppress two flake8 issues (false positives) * Update Changelog * Update CONTRIBUTORS * Document creating a version from a byte string Co-authored-by: Eli Bishop --- CHANGELOG.rst | 1 + CONTRIBUTORS | 1 + docs/usage.rst | 9 +++- semver.py | 59 ++++++++++++++++++++++-- setup.cfg | 1 + test_typeerror-274.py | 102 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 test_typeerror-274.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c79012bf..e5736f33 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,7 @@ Bug Fixes --------- * :gh:`276` (:pr:`277`): VersionInfo.parse should be a class method +* :gh:`274` (:pr:`275`): Py2 vs. Py3 incompatibility TypeError Additions diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 4460512c..a4b61fa3 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -28,6 +28,7 @@ Significant contributors * Carles Barrobés * Craig Blaszczyk * Damien Nadé +* Eli Bishop * George Sakkis * Jan Pieter Waagmeester * Jelo Agnasin diff --git a/docs/usage.rst b/docs/usage.rst index 2f23e571..010f1c0b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -45,10 +45,17 @@ creating a version: A :class:`semver.VersionInfo` instance can be created in different ways: -* From a string:: +* From a string (a Unicode string in Python 2):: >>> semver.VersionInfo.parse("3.4.5-pre.2+build.4") VersionInfo(major=3, minor=4, patch=5, prerelease='pre.2', build='build.4') + >>> semver.VersionInfo.parse(u"5.3.1") + VersionInfo(major=5, minor=3, patch=1, prerelease=None, build=None) + +* From a byte string:: + + >>> semver.VersionInfo.parse(b"2.3.4") + VersionInfo(major=2, minor=3, patch=4, prerelease=None, build=None) * From individual parts by a dictionary:: diff --git a/semver.py b/semver.py index f6e5bffc..4797d61d 100644 --- a/semver.py +++ b/semver.py @@ -10,6 +10,10 @@ import warnings +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + __version__ = "2.10.2" __author__ = "Kostiantyn Rybnikov" __author_email__ = "k-bx@k-bx.com" @@ -60,6 +64,53 @@ def cmp(a, b): return (a > b) - (a < b) +if PY3: # pragma: no cover + string_types = str, bytes + text_type = str + binary_type = bytes + + def b(s): + return s.encode("latin-1") + + def u(s): + return s + + +else: # pragma: no cover + string_types = unicode, str + text_type = unicode + binary_type = str + + def b(s): + return s + + # Workaround for standalone backslash + def u(s): + return unicode(s.replace(r"\\", r"\\\\"), "unicode_escape") + + +def ensure_str(s, encoding="utf-8", errors="strict"): + # Taken from six project + """ + Coerce *s* to `str`. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> `str` + - `bytes` -> decoded to `str` + """ + if not isinstance(s, (text_type, binary_type)): + raise TypeError("not expecting type '%s'" % type(s)) + if PY2 and isinstance(s, text_type): + s = s.encode(encoding, errors) + elif PY3 and isinstance(s, binary_type): + s = s.decode(encoding, errors) + return s + + def deprecated(func=None, replace=None, version=None, category=DeprecationWarning): """ Decorates a function to output a deprecation warning. @@ -144,7 +195,7 @@ def comparator(operator): @wraps(operator) def wrapper(self, other): - comparable_types = (VersionInfo, dict, tuple, list, str) + comparable_types = (VersionInfo, dict, tuple, list, text_type, binary_type) if not isinstance(other, comparable_types): raise TypeError( "other type %r must be in %r" % (type(other), comparable_types) @@ -423,7 +474,7 @@ def compare(self, other): 0 """ cls = type(self) - if isinstance(other, str): + if isinstance(other, string_types): other = cls.parse(other) elif isinstance(other, dict): other = cls(**other) @@ -651,7 +702,7 @@ def parse(cls, version): VersionInfo(major=3, minor=4, patch=5, \ prerelease='pre.2', build='build.4') """ - match = cls._REGEX.match(version) + match = cls._REGEX.match(ensure_str(version)) if match is None: raise ValueError("%s is not valid SemVer string" % version) @@ -825,7 +876,7 @@ def max_ver(ver1, ver2): >>> semver.max_ver("1.0.0", "2.0.0") '2.0.0' """ - if isinstance(ver1, str): + if isinstance(ver1, string_types): ver1 = VersionInfo.parse(ver1) elif not isinstance(ver1, VersionInfo): raise TypeError() diff --git a/setup.cfg b/setup.cfg index 1cefc4bf..a22f03a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,7 @@ addopts = [flake8] max-line-length = 88 +ignore = F821,W503 exclude = .env, .eggs, diff --git a/test_typeerror-274.py b/test_typeerror-274.py new file mode 100644 index 00000000..2ed03d61 --- /dev/null +++ b/test_typeerror-274.py @@ -0,0 +1,102 @@ +import pytest +import sys + +import semver + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + +def ensure_binary(s, encoding="utf-8", errors="strict"): + """Coerce **s** to six.binary_type. + + For Python 2: + - `unicode` -> encoded to `str` + - `str` -> `str` + + For Python 3: + - `str` -> encoded to `bytes` + - `bytes` -> `bytes` + """ + if isinstance(s, semver.text_type): + return s.encode(encoding, errors) + elif isinstance(s, semver.binary_type): + return s + else: + raise TypeError("not expecting type '%s'" % type(s)) + + +def test_should_work_with_string_and_unicode(): + result = semver.compare(semver.u("1.1.0"), semver.b("1.2.2")) + assert result == -1 + result = semver.compare(semver.b("1.1.0"), semver.u("1.2.2")) + assert result == -1 + + +class TestEnsure: + # From six project + # grinning face emoji + UNICODE_EMOJI = semver.u("\U0001F600") + BINARY_EMOJI = b"\xf0\x9f\x98\x80" + + def test_ensure_binary_raise_type_error(self): + with pytest.raises(TypeError): + semver.ensure_str(8) + + def test_errors_and_encoding(self): + ensure_binary(self.UNICODE_EMOJI, encoding="latin-1", errors="ignore") + with pytest.raises(UnicodeEncodeError): + ensure_binary(self.UNICODE_EMOJI, encoding="latin-1", errors="strict") + + def test_ensure_binary_raise(self): + converted_unicode = ensure_binary( + self.UNICODE_EMOJI, encoding="utf-8", errors="strict" + ) + converted_binary = ensure_binary( + self.BINARY_EMOJI, encoding="utf-8", errors="strict" + ) + if semver.PY2: + # PY2: unicode -> str + assert converted_unicode == self.BINARY_EMOJI and isinstance( + converted_unicode, str + ) + # PY2: str -> str + assert converted_binary == self.BINARY_EMOJI and isinstance( + converted_binary, str + ) + else: + # PY3: str -> bytes + assert converted_unicode == self.BINARY_EMOJI and isinstance( + converted_unicode, bytes + ) + # PY3: bytes -> bytes + assert converted_binary == self.BINARY_EMOJI and isinstance( + converted_binary, bytes + ) + + def test_ensure_str(self): + converted_unicode = semver.ensure_str( + self.UNICODE_EMOJI, encoding="utf-8", errors="strict" + ) + converted_binary = semver.ensure_str( + self.BINARY_EMOJI, encoding="utf-8", errors="strict" + ) + if PY2: + # PY2: unicode -> str + assert converted_unicode == self.BINARY_EMOJI and isinstance( + converted_unicode, str + ) + # PY2: str -> str + assert converted_binary == self.BINARY_EMOJI and isinstance( + converted_binary, str + ) + else: + # PY3: str -> str + assert converted_unicode == self.UNICODE_EMOJI and isinstance( + converted_unicode, str + ) + # PY3: bytes -> str + assert converted_binary == self.UNICODE_EMOJI and isinstance( + converted_unicode, str + )