Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5fd840e

Browse filesBrowse files
feat(bigquery): unit and system test for dataframe with int column with Nan values (#39)
* feat(bigquery): add unit and system tests for int columns * feat(bigquery): cosmetic changes * feat(bigquery): use pkg_resources for comparison * feat(bigquery): nit
1 parent 18eb9e8 commit 5fd840e
Copy full SHA for 5fd840e

File tree

Expand file treeCollapse file tree

2 files changed

+160
-0
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

2 files changed

+160
-0
lines changed
Open diff view settings
Collapse file

‎tests/system.py‎

Copy file name to clipboardExpand all lines: tests/system.py
+64Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import psutil
3232
import pytest
3333
import pytz
34+
import pkg_resources
3435

3536
try:
3637
from google.cloud import bigquery_storage_v1beta1
@@ -125,6 +126,9 @@
125126
(TooManyRequests, InternalServerError, ServiceUnavailable)
126127
)
127128

129+
PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0")
130+
PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
131+
128132

129133
def _has_rows(result):
130134
return len(result) > 0
@@ -742,6 +746,66 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
742746
)
743747
self.assertEqual(table.num_rows, 3)
744748

749+
@unittest.skipIf(
750+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
751+
"Only `pandas version >=1.0.0` is supported",
752+
)
753+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
754+
def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
755+
"""Test that a DataFrame containing column with None-type values and int64 datatype
756+
can be uploaded if a BigQuery schema is specified.
757+
758+
https://github.com/googleapis/python-bigquery/issues/22
759+
"""
760+
761+
dataset_id = _make_dataset_id("bq_load_test")
762+
self.temp_dataset(dataset_id)
763+
table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
764+
Config.CLIENT.project, dataset_id
765+
)
766+
table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),)
767+
table = retry_403(Config.CLIENT.create_table)(
768+
Table(table_id, schema=table_schema)
769+
)
770+
self.to_delete.insert(0, table)
771+
772+
df_data = collections.OrderedDict(
773+
[("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
774+
)
775+
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
776+
load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
777+
load_job.result()
778+
table = Config.CLIENT.get_table(table_id)
779+
self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
780+
self.assertEqual(table.num_rows, 4)
781+
782+
@unittest.skipIf(
783+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
784+
"Only `pandas version >=1.0.0` is supported",
785+
)
786+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
787+
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
788+
"""Test that a DataFrame containing column with None-type values and int64 datatype
789+
can be uploaded without specifying a schema.
790+
791+
https://github.com/googleapis/python-bigquery/issues/22
792+
"""
793+
794+
dataset_id = _make_dataset_id("bq_load_test")
795+
self.temp_dataset(dataset_id)
796+
table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
797+
Config.CLIENT.project, dataset_id
798+
)
799+
df_data = collections.OrderedDict(
800+
[("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
801+
)
802+
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
803+
load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
804+
load_job.result()
805+
table = Config.CLIENT.get_table(table_id)
806+
self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
807+
self.assertEqual(table.num_rows, 4)
808+
745809
@unittest.skipIf(pandas is None, "Requires `pandas`")
746810
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
747811
def test_load_table_from_dataframe_w_nulls(self):
Collapse file

‎tests/unit/test_client.py‎

Copy file name to clipboardExpand all lines: tests/unit/test_client.py
+96Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from six.moves import http_client
3131
import pytest
3232
import pytz
33+
import pkg_resources
3334

3435
try:
3536
import fastparquet
@@ -56,6 +57,9 @@
5657
bigquery_storage_v1beta1 = None
5758
from tests.unit.helpers import make_connection
5859

60+
PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
61+
PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
62+
5963

6064
def _make_credentials():
6165
import google.auth.credentials
@@ -6973,6 +6977,98 @@ def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self):
69736977
]
69746978
assert matches, "A missing schema deprecation warning was not raised."
69756979

6980+
@unittest.skipIf(
6981+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
6982+
"Only `pandas version >=1.0.0` supported",
6983+
)
6984+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
6985+
def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
6986+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
6987+
from google.cloud.bigquery import job
6988+
from google.cloud.bigquery.schema import SchemaField
6989+
6990+
client = self._make_client()
6991+
dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64")
6992+
load_patch = mock.patch(
6993+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
6994+
)
6995+
6996+
get_table_patch = mock.patch(
6997+
"google.cloud.bigquery.client.Client.get_table",
6998+
autospec=True,
6999+
return_value=mock.Mock(schema=[SchemaField("x", "INT64", "NULLABLE")]),
7000+
)
7001+
7002+
with load_patch as load_table_from_file, get_table_patch:
7003+
client.load_table_from_dataframe(
7004+
dataframe, self.TABLE_REF, location=self.LOCATION
7005+
)
7006+
7007+
load_table_from_file.assert_called_once_with(
7008+
client,
7009+
mock.ANY,
7010+
self.TABLE_REF,
7011+
num_retries=_DEFAULT_NUM_RETRIES,
7012+
rewind=True,
7013+
job_id=mock.ANY,
7014+
job_id_prefix=None,
7015+
location=self.LOCATION,
7016+
project=None,
7017+
job_config=mock.ANY,
7018+
)
7019+
7020+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7021+
assert sent_config.source_format == job.SourceFormat.PARQUET
7022+
assert tuple(sent_config.schema) == (
7023+
SchemaField("x", "INT64", "NULLABLE", None),
7024+
)
7025+
7026+
@unittest.skipIf(
7027+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
7028+
"Only `pandas version >=1.0.0` supported",
7029+
)
7030+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
7031+
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
7032+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
7033+
from google.cloud.bigquery import job
7034+
from google.cloud.bigquery.schema import SchemaField
7035+
7036+
client = self._make_client()
7037+
dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64")
7038+
load_patch = mock.patch(
7039+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
7040+
)
7041+
7042+
get_table_patch = mock.patch(
7043+
"google.cloud.bigquery.client.Client.get_table",
7044+
autospec=True,
7045+
side_effect=google.api_core.exceptions.NotFound("Table not found"),
7046+
)
7047+
7048+
with load_patch as load_table_from_file, get_table_patch:
7049+
client.load_table_from_dataframe(
7050+
dataframe, self.TABLE_REF, location=self.LOCATION
7051+
)
7052+
7053+
load_table_from_file.assert_called_once_with(
7054+
client,
7055+
mock.ANY,
7056+
self.TABLE_REF,
7057+
num_retries=_DEFAULT_NUM_RETRIES,
7058+
rewind=True,
7059+
job_id=mock.ANY,
7060+
job_id_prefix=None,
7061+
location=self.LOCATION,
7062+
project=None,
7063+
job_config=mock.ANY,
7064+
)
7065+
7066+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7067+
assert sent_config.source_format == job.SourceFormat.PARQUET
7068+
assert tuple(sent_config.schema) == (
7069+
SchemaField("x", "INT64", "NULLABLE", None),
7070+
)
7071+
69767072
@unittest.skipIf(pandas is None, "Requires `pandas`")
69777073
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
69787074
def test_load_table_from_dataframe_struct_fields_error(self):

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.