Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b289076

Browse filesBrowse files
authored
fix: guard imports against unsupported pyarrow versions (#934)
* fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype
1 parent 10fee52 commit b289076
Copy full SHA for b289076

File tree

Expand file treeCollapse file tree

9 files changed

+184
-39
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

9 files changed

+184
-39
lines changed
Open diff view settings
Collapse file

‎google/cloud/bigquery/_helpers.py‎

Copy file name to clipboardExpand all lines: google/cloud/bigquery/_helpers.py
+69-3Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import decimal
2020
import math
2121
import re
22-
from typing import Union
22+
from typing import Any, Union
2323

2424
from google.cloud._helpers import UTC
2525
from google.cloud._helpers import _date_from_iso8601_date
@@ -29,7 +29,10 @@
2929
from google.cloud._helpers import _to_bytes
3030
import packaging.version
3131

32-
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
32+
from google.cloud.bigquery.exceptions import (
33+
LegacyBigQueryStorageError,
34+
LegacyPyarrowError,
35+
)
3336

3437

3538
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
@@ -42,6 +45,7 @@
4245
re.VERBOSE,
4346
)
4447

48+
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
4549
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
4650
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
4751

@@ -95,12 +99,74 @@ def verify_version(self):
9599
if self.installed_version < _MIN_BQ_STORAGE_VERSION:
96100
msg = (
97101
"Dependency google-cloud-bigquery-storage is outdated, please upgrade "
98-
f"it to version >= 2.0.0 (version found: {self.installed_version})."
102+
f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})."
99103
)
100104
raise LegacyBigQueryStorageError(msg)
101105

102106

107+
class PyarrowVersions:
108+
"""Version comparisons for pyarrow package."""
109+
110+
def __init__(self):
111+
self._installed_version = None
112+
113+
@property
114+
def installed_version(self) -> packaging.version.Version:
115+
"""Return the parsed version of pyarrow."""
116+
if self._installed_version is None:
117+
import pyarrow
118+
119+
self._installed_version = packaging.version.parse(
120+
# Use 0.0.0, since it is earlier than any released version.
121+
# Legacy versions also have the same property, but
122+
# creating a LegacyVersion has been deprecated.
123+
# https://github.com/pypa/packaging/issues/321
124+
getattr(pyarrow, "__version__", "0.0.0")
125+
)
126+
127+
return self._installed_version
128+
129+
def try_import(self, raise_if_error: bool = False) -> Any:
130+
"""Verify that a recent enough version of pyarrow extra is
131+
installed.
132+
133+
The function assumes that pyarrow extra is installed, and should thus
134+
be used in places where this assumption holds.
135+
136+
Because `pip` can install an outdated version of this extra despite the
137+
constraints in `setup.py`, the calling code can use this helper to
138+
verify the version compatibility at runtime.
139+
140+
Returns:
141+
The ``pyarrow`` module or ``None``.
142+
143+
Raises:
144+
LegacyPyarrowError:
145+
If the pyarrow package is outdated and ``raise_if_error`` is ``True``.
146+
"""
147+
try:
148+
import pyarrow
149+
except ImportError as exc: # pragma: NO COVER
150+
if raise_if_error:
151+
raise LegacyPyarrowError(
152+
f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}."
153+
) from exc
154+
return None
155+
156+
if self.installed_version < _MIN_PYARROW_VERSION:
157+
if raise_if_error:
158+
msg = (
159+
"Dependency pyarrow is outdated, please upgrade "
160+
f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})."
161+
)
162+
raise LegacyPyarrowError(msg)
163+
return None
164+
165+
return pyarrow
166+
167+
103168
BQ_STORAGE_VERSIONS = BQStorageVersions()
169+
PYARROW_VERSIONS = PyarrowVersions()
104170

105171

106172
def _not_null(value, field):
Collapse file

‎google/cloud/bigquery/_pandas_helpers.py‎

Copy file name to clipboardExpand all lines: google/cloud/bigquery/_pandas_helpers.py
+6-13Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ def _to_wkb(v):
5555

5656
_to_wkb = _to_wkb()
5757

58-
try:
59-
import pyarrow
60-
import pyarrow.parquet
61-
except ImportError: # pragma: NO COVER
62-
pyarrow = None
63-
6458
try:
6559
from google.cloud.bigquery_storage import ArrowSerializationOptions
6660
except ImportError:
@@ -73,12 +67,10 @@ def _to_wkb(v):
7367
from google.cloud.bigquery import schema
7468

7569

76-
_LOGGER = logging.getLogger(__name__)
70+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
7771

78-
_NO_BQSTORAGE_ERROR = (
79-
"The google-cloud-bigquery-storage library is not installed, "
80-
"please install google-cloud-bigquery-storage to use bqstorage features."
81-
)
72+
73+
_LOGGER = logging.getLogger(__name__)
8274

8375
_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.
8476

@@ -548,8 +540,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN
548540
serializing method. Defaults to "SNAPPY".
549541
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
550542
"""
551-
if pyarrow is None:
552-
raise ValueError("pyarrow is required for BigQuery schema conversion.")
543+
pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
544+
545+
import pyarrow.parquet
553546

554547
bq_schema = schema._to_schema_fields(bq_schema)
555548
arrow_table = dataframe_to_arrow(dataframe, bq_schema)
Collapse file

‎google/cloud/bigquery/exceptions.py‎

Copy file name to clipboardExpand all lines: google/cloud/bigquery/exceptions.py
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,7 @@ class BigQueryError(Exception):
1919

2020
class LegacyBigQueryStorageError(BigQueryError):
2121
"""Raised when too old a version of BigQuery Storage extra is detected at runtime."""
22+
23+
24+
class LegacyPyarrowError(BigQueryError):
25+
"""Raised when too old a version of pyarrow package is detected at runtime."""
Collapse file

‎noxfile.py‎

Copy file name to clipboardExpand all lines: noxfile.py
+8-1Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,16 @@ def unit(session):
9494
default(session)
9595

9696

97-
@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1])
97+
@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]])
9898
def unit_noextras(session):
9999
"""Run the unit test suite."""
100+
101+
# Install optional dependencies that are out-of-date.
102+
# https://github.com/googleapis/python-bigquery/issues/933
103+
# There is no pyarrow 1.0.0 package for Python 3.9.
104+
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
105+
session.install("pyarrow==1.0.0")
106+
100107
default(session, install_extras=False)
101108

102109

Collapse file

‎testing/constraints-3.6.txt‎

Copy file name to clipboardExpand all lines: testing/constraints-3.6.txt
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ proto-plus==1.10.0
1919
protobuf==3.12.0
2020
pyarrow==3.0.0
2121
requests==2.18.0
22-
shapely==1.6.0
22+
Shapely==1.6.0
2323
six==1.13.0
2424
tqdm==4.7.4
Collapse file

‎tests/unit/job/test_query_pandas.py‎

Copy file name to clipboardExpand all lines: tests/unit/job/test_query_pandas.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@
3131
import geopandas
3232
except (ImportError, AttributeError): # pragma: NO COVER
3333
geopandas = None
34-
try:
35-
import pyarrow
36-
except (ImportError, AttributeError): # pragma: NO COVER
37-
pyarrow = None
3834
try:
3935
from google.cloud import bigquery_storage
4036
except (ImportError, AttributeError): # pragma: NO COVER
@@ -44,11 +40,15 @@
4440
except (ImportError, AttributeError): # pragma: NO COVER
4541
tqdm = None
4642

43+
from google.cloud.bigquery import _helpers
4744
from .helpers import _make_client
4845
from .helpers import _make_connection
4946
from .helpers import _make_job_resource
5047

5148

49+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
50+
51+
5252
@pytest.fixture
5353
def table_read_options_kwarg():
5454
# Create a BigQuery Storage table read options object with pyarrow compression
Collapse file

‎tests/unit/test__helpers.py‎

Copy file name to clipboardExpand all lines: tests/unit/test__helpers.py
+68Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,20 @@
2424
except ImportError: # pragma: NO COVER
2525
bigquery_storage = None
2626

27+
try:
28+
import pyarrow
29+
except ImportError: # pragma: NO COVER
30+
pyarrow = None
31+
2732

2833
@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`")
2934
class TestBQStorageVersions(unittest.TestCase):
35+
def tearDown(self):
36+
from google.cloud.bigquery import _helpers
37+
38+
# Reset any cached versions since it may not match reality.
39+
_helpers.BQ_STORAGE_VERSIONS._installed_version = None
40+
3041
def _object_under_test(self):
3142
from google.cloud.bigquery import _helpers
3243

@@ -89,6 +100,63 @@ def test_is_read_session_optional_false(self):
89100
assert not versions.is_read_session_optional
90101

91102

103+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
104+
class TestPyarrowVersions(unittest.TestCase):
105+
def tearDown(self):
106+
from google.cloud.bigquery import _helpers
107+
108+
# Reset any cached versions since it may not match reality.
109+
_helpers.PYARROW_VERSIONS._installed_version = None
110+
111+
def _object_under_test(self):
112+
from google.cloud.bigquery import _helpers
113+
114+
return _helpers.PyarrowVersions()
115+
116+
def _call_try_import(self, **kwargs):
117+
from google.cloud.bigquery import _helpers
118+
119+
_helpers.PYARROW_VERSIONS._installed_version = None
120+
return _helpers.PYARROW_VERSIONS.try_import(**kwargs)
121+
122+
def test_try_import_raises_no_error_w_recent_pyarrow(self):
123+
from google.cloud.bigquery.exceptions import LegacyPyarrowError
124+
125+
with mock.patch("pyarrow.__version__", new="5.0.0"):
126+
try:
127+
pyarrow = self._call_try_import(raise_if_error=True)
128+
self.assertIsNotNone(pyarrow)
129+
except LegacyPyarrowError: # pragma: NO COVER
130+
self.fail("Legacy error raised with a non-legacy dependency version.")
131+
132+
def test_try_import_returns_none_w_legacy_pyarrow(self):
133+
with mock.patch("pyarrow.__version__", new="2.0.0"):
134+
pyarrow = self._call_try_import()
135+
self.assertIsNone(pyarrow)
136+
137+
def test_try_import_raises_error_w_legacy_pyarrow(self):
138+
from google.cloud.bigquery.exceptions import LegacyPyarrowError
139+
140+
with mock.patch("pyarrow.__version__", new="2.0.0"):
141+
with self.assertRaises(LegacyPyarrowError):
142+
self._call_try_import(raise_if_error=True)
143+
144+
def test_installed_version_returns_cached(self):
145+
versions = self._object_under_test()
146+
versions._installed_version = object()
147+
assert versions.installed_version is versions._installed_version
148+
149+
def test_installed_version_returns_parsed_version(self):
150+
versions = self._object_under_test()
151+
152+
with mock.patch("pyarrow.__version__", new="1.2.3"):
153+
version = versions.installed_version
154+
155+
assert version.major == 1
156+
assert version.minor == 2
157+
assert version.micro == 3
158+
159+
92160
class Test_not_null(unittest.TestCase):
93161
def _call_fut(self, value, field):
94162
from google.cloud.bigquery._helpers import _not_null
Collapse file

‎tests/unit/test__pandas_helpers.py‎

Copy file name to clipboardExpand all lines: tests/unit/test__pandas_helpers.py
+18-11Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@
2929
import pandas.testing
3030
except ImportError: # pragma: NO COVER
3131
pandas = None
32-
try:
33-
import pyarrow
34-
import pyarrow.types
35-
except ImportError: # pragma: NO COVER
36-
# Mock out pyarrow when missing, because methods from pyarrow.types are
37-
# used in test parameterization.
38-
pyarrow = mock.Mock()
3932
try:
4033
import geopandas
4134
except ImportError: # pragma: NO COVER
@@ -44,9 +37,19 @@
4437
import pytest
4538

4639
from google import api_core
40+
from google.cloud.bigquery import exceptions
4741
from google.cloud.bigquery import _helpers
4842
from google.cloud.bigquery import schema
4943

44+
45+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
46+
if pyarrow:
47+
import pyarrow.types
48+
else: # pragma: NO COVER
49+
# Mock out pyarrow when missing, because methods from pyarrow.types are
50+
# used in test parameterization.
51+
pyarrow = mock.Mock()
52+
5053
try:
5154
from google.cloud import bigquery_storage
5255

@@ -1120,15 +1123,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
11201123

11211124
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11221125
def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch):
1123-
monkeypatch.setattr(module_under_test, "pyarrow", None)
1124-
with pytest.raises(ValueError) as exc_context:
1126+
mock_pyarrow_import = mock.Mock()
1127+
mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError(
1128+
"pyarrow not installed"
1129+
)
1130+
monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import)
1131+
1132+
with pytest.raises(exceptions.LegacyPyarrowError):
11251133
module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None)
1126-
assert "pyarrow is required" in str(exc_context.value)
11271134

11281135

11291136
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11301137
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
1131-
def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch):
1138+
def test_dataframe_to_parquet_w_extra_fields(module_under_test):
11321139
with pytest.raises(ValueError) as exc_context:
11331140
module_under_test.dataframe_to_parquet(
11341141
pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None
Collapse file

‎tests/unit/test_table.py‎

Copy file name to clipboardExpand all lines: tests/unit/test_table.py
+6-6Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,18 @@
4545
except (ImportError, AttributeError): # pragma: NO COVER
4646
geopandas = None
4747

48-
try:
49-
import pyarrow
50-
import pyarrow.types
51-
except ImportError: # pragma: NO COVER
52-
pyarrow = None
53-
5448
try:
5549
from tqdm import tqdm
5650
except (ImportError, AttributeError): # pragma: NO COVER
5751
tqdm = None
5852

5953
from google.cloud.bigquery.dataset import DatasetReference
54+
from google.cloud.bigquery import _helpers
55+
56+
57+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
58+
if pyarrow:
59+
import pyarrow.types
6060

6161

6262
def _mock_client():

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.