diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 55a7cb62..3f7634f2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,6 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:631b4a35a4f9dd5e97740a97c4c117646eb85b35e103844dc49d152bd18694cd -# created: 2025-02-05T14:40:56.685429494Z - + digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf +# created: 2025-02-21T19:32:52.01306189Z diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 08171cbd..d41b45aa 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,11 +15,13 @@ set -eo pipefail +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") + if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT="github/python-bigquery-pandas" + PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") fi -cd "${PROJECT_ROOT}" +pushd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -28,10 +30,16 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Setup service account credentials. -export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] +then + export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +fi # Setup project id. -export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] +then + export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +fi # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. @@ -46,7 +54,7 @@ fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} + python3 -m nox -s ${NOX_SESSION:-} else - python3 -m nox + python3 -m nox fi diff --git a/.readthedocs.yml b/.readthedocs.yml index 7ae21e16..86e0f0fb 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -16,3 +16,9 @@ build: python: install: - requirements: docs/requirements-docs.txt + +# Explicit configuration path is required by ReadtheDocs starting Jan 20, 2025. +# See: https://about.readthedocs.com/blog/2024/12/deprecate-config-files-without-sphinx-or-mkdocs-config/ +version: 2 +sphinx: + configuration: docs/conf.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f628cddd..a860458b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.28.0](https://github.com/googleapis/python-bigquery-pandas/compare/v0.27.0...v0.28.0) (2025-02-24) + + +### Features + +* Add bigquery_client as a parameter for read_gbq and to_gbq ([#878](https://github.com/googleapis/python-bigquery-pandas/issues/878)) ([d42a562](https://github.com/googleapis/python-bigquery-pandas/commit/d42a56200fe2f356240c7956da4c201e872be4d5)) + ## [0.27.0](https://github.com/googleapis/python-bigquery-pandas/compare/v0.26.1...v0.27.0) (2025-02-05) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index feffd858..bd3afb97 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -269,6 +269,7 @@ def __init__( client_secret=None, user_agent=None, rfc9110_delimiter=False, + bigquery_client=None, ): global context from google.api_core.exceptions import ClientError, GoogleAPIError @@ -288,6 +289,14 @@ def __init__( self.client_secret = client_secret self.user_agent = user_agent self.rfc9110_delimiter = rfc9110_delimiter + self.use_bqstorage_api = use_bqstorage_api + + if bigquery_client is not None: + # If a bq client is already provided, use it to populate auth fields. + self.project_id = bigquery_client.project + self.credentials = bigquery_client._credentials + self.client = bigquery_client + return default_project = None @@ -325,8 +334,9 @@ def __init__( if context.project is None: context.project = self.project_id - self.client = self.get_client() - self.use_bqstorage_api = use_bqstorage_api + self.client = _get_client( + self.user_agent, self.rfc9110_delimiter, self.project_id, self.credentials + ) def _start_timer(self): self.start = time.time() @@ -702,6 +712,7 @@ def read_gbq( client_secret=None, *, col_order=None, + bigquery_client=None, ): r"""Read data from Google BigQuery to a pandas DataFrame. @@ -849,6 +860,9 @@ def read_gbq( the user is attempting to connect to. col_order : list(str), optional Alias for columns, retained for backwards compatibility. + bigquery_client : google.cloud.bigquery.Client, optional + A Google Cloud BigQuery Python Client instance. If provided, it will be used for reading + data, while the project and credentials parameters will be ignored. Returns ------- @@ -900,6 +914,7 @@ def read_gbq( auth_redirect_uri=auth_redirect_uri, client_id=client_id, client_secret=client_secret, + bigquery_client=bigquery_client, ) if _is_query(query_or_table): @@ -971,6 +986,7 @@ def to_gbq( client_secret=None, user_agent=None, rfc9110_delimiter=False, + bigquery_client=None, ): """Write a DataFrame to a Google BigQuery table. @@ -1087,6 +1103,9 @@ def to_gbq( rfc9110_delimiter : bool Sets user agent delimiter to a hyphen or a slash. Default is False, meaning a hyphen will be used. + bigquery_client : google.cloud.bigquery.Client, optional + A Google Cloud BigQuery Python Client instance. If provided, it will be used for reading + data, while the project, user_agent, and credentials parameters will be ignored. .. versionadded:: 0.23.3 """ @@ -1157,6 +1176,7 @@ def to_gbq( client_secret=client_secret, user_agent=user_agent, rfc9110_delimiter=rfc9110_delimiter, + bigquery_client=bigquery_client, ) bqclient = connector.client @@ -1492,3 +1512,22 @@ def create_user_agent( user_agent = f"{user_agent} {identity}" return user_agent + + +def _get_client(user_agent, rfc9110_delimiter, project_id, credentials): + import google.api_core.client_info + + bigquery = FEATURES.bigquery_try_import() + + user_agent = create_user_agent( + user_agent=user_agent, rfc9110_delimiter=rfc9110_delimiter + ) + + client_info = google.api_core.client_info.ClientInfo( + user_agent=user_agent, + ) + return bigquery.Client( + project=project_id, + credentials=credentials, + client_info=client_info, + ) diff --git a/pandas_gbq/version.py b/pandas_gbq/version.py index e9325b39..a6070398 100644 --- a/pandas_gbq/version.py +++ b/pandas_gbq/version.py @@ -2,4 +2,4 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -__version__ = "0.27.0" +__version__ = "0.28.0" diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 8c45167f..cb8aadb9 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -54,6 +54,13 @@ def to_gbq(credentials, project_id): ) +@pytest.fixture +def to_gbq_with_bq_client(bigquery_client): + import pandas_gbq + + return functools.partial(pandas_gbq.to_gbq, bigquery_client=bigquery_client) + + @pytest.fixture def read_gbq(credentials, project_id): import pandas_gbq @@ -63,6 +70,13 @@ def read_gbq(credentials, project_id): ) +@pytest.fixture +def read_gbq_with_bq_client(bigquery_client): + import pandas_gbq + + return functools.partial(pandas_gbq.read_gbq, bigquery_client=bigquery_client) + + @pytest.fixture() def random_dataset_id(bigquery_client: bigquery.Client, project_id: str): dataset_id = prefixer.create_prefix() diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index b62f3590..1457ec30 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -1398,3 +1398,13 @@ def test_to_gbq_does_not_override_mode(gbq_table, gbq_connector): ) assert verify_schema(gbq_connector, gbq_table.dataset_id, table_id, table_schema) + + +def test_gbqconnector_init_with_bq_client(bigquery_client): + gbq_connector = gbq.GbqConnector( + project_id="project_id", credentials=None, bigquery_client=bigquery_client + ) + + assert gbq_connector.project_id == bigquery_client.project + assert gbq_connector.credentials is bigquery_client._credentials + assert gbq_connector.client is bigquery_client diff --git a/tests/system/test_read_gbq.py b/tests/system/test_read_gbq.py index 4ae96a36..72cb6b66 100644 --- a/tests/system/test_read_gbq.py +++ b/tests/system/test_read_gbq.py @@ -659,3 +659,14 @@ def test_dml_query(read_gbq, writable_table: str): """ result = read_gbq(query) assert result is not None + + +def test_read_gbq_with_bq_client(read_gbq_with_bq_client): + query = "SELECT * FROM UNNEST([1, 2, 3]) AS numbers" + + actual_result = read_gbq_with_bq_client(query) + + expected_result = pandas.DataFrame( + {"numbers": pandas.Series([1, 2, 3], dtype="Int64")} + ) + pandas.testing.assert_frame_equal(actual_result, expected_result) diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 139f072b..ad7c58ec 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -615,3 +615,17 @@ def test_dataframe_round_trip_with_table_schema( pandas.testing.assert_frame_equal( expected_df.set_index("row_num").sort_index(), round_trip ) + + +def test_dataframe_round_trip_with_bq_client( + to_gbq_with_bq_client, read_gbq_with_bq_client, random_dataset_id +): + table_id = ( + f"{random_dataset_id}.round_trip_w_bq_client_{random.randrange(1_000_000)}" + ) + df = pandas.DataFrame({"numbers": pandas.Series([1, 2, 3], dtype="Int64")}) + + to_gbq_with_bq_client(df, table_id) + result = read_gbq_with_bq_client(table_id) + + pandas.testing.assert_frame_equal(result, df)