From 4e618e560bfea1ca3b7f17b4fc1f8f438fa6c77e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 27 Mar 2025 19:00:31 +0100 Subject: [PATCH 01/17] chore(deps): update all dependencies (#2143) * chore(deps): update all dependencies * pin ipython===8.18.1 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 6 ++--- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 4 +-- samples/geography/requirements.txt | 34 ++++++++++++------------ samples/magics/requirements-test.txt | 6 ++--- samples/magics/requirements.txt | 8 +++--- samples/notebooks/requirements-test.txt | 6 ++--- samples/notebooks/requirements.txt | 13 ++++----- samples/snippets/requirements-test.txt | 6 ++--- samples/snippets/requirements.txt | 2 +- 10 files changed, 44 insertions(+), 43 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 165800741..fa349e0d3 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 4ad1bd028..5d20a4554 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==8.3.4 -mock==5.1.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0ad2154a4..3fa11ce7c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,41 +1,41 @@ -attrs==24.3.0 -certifi==2024.12.14 +attrs==25.3.0 +certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.3.1 +db-dtypes==1.4.2 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 -google-api-core==2.24.0 -google-auth==2.37.0 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -google-cloud-core==2.4.1 -google-crc32c==1.6.0 +google-api-core==2.24.2 +google-auth==2.38.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-core==2.4.3 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.66.0 -grpcio==1.69.0 +googleapis-common-protos==1.69.2 +grpcio==1.71.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.2 pandas==2.2.3 -proto-plus==1.25.0 -pyarrow==18.1.0 +proto-plus==1.26.1 +pyarrow==19.0.1 pyasn1==0.6.1 pyasn1-modules==0.4.1 pycparser==2.22 -pyparsing==3.2.1 +pyparsing==3.2.3 python-dateutil==2.9.0.post0 -pytz==2024.2 +pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.6 +Shapely==2.0.7 six==1.17.0 -typing-extensions==4.12.2 +typing-extensions==4.13.0 typing-inspect==0.9.0 urllib3==2.3.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4b81fe0ad..3ab215951 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.5.0 -db-dtypes==1.3.1 -google.cloud.bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 +bigquery_magics==0.9.0 +db-dtypes==1.4.2 +google.cloud.bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index e92d084a4..ca5505a2e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,8 +1,9 @@ -bigquery-magics==0.5.0 -db-dtypes==1.3.1 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -ipython==8.18.1 +bigquery-magics==0.9.0 +db-dtypes==1.4.2 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +ipython===8.18.1; python_version == '3.9' +ipython==9.0.2; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.0; python_version >= '3.10' +matplotlib==3.10.1; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 52ccc8ab2..197b89187 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 307ebac24..4b88c6b70 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 From c526822ce781d5c24e37703507d74fd785a5fe29 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Apr 2025 00:40:19 +0200 Subject: [PATCH 02/17] chore(deps): update dependency pyasn1-modules to v0.4.2 (#2150) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3fa11ce7c..514e19d2c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -26,7 +26,7 @@ pandas==2.2.3 proto-plus==1.26.1 pyarrow==19.0.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 +pyasn1-modules==0.4.2 pycparser==2.22 pyparsing==3.2.3 python-dateutil==2.9.0.post0 From 77d71736fcc006d3ab8f8ba17955ad5f06e21876 Mon Sep 17 00:00:00 2001 From: yokomotod Date: Wed, 2 Apr 2025 05:16:41 +0900 Subject: [PATCH 03/17] fix: empty record dtypes (#2147) * fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 42 +++++++++++++++------------------- pyproject.toml | 2 +- testing/constraints-3.9.txt | 2 +- tests/system/test_pandas.py | 7 +----- tests/unit/test_table.py | 10 ++------ 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 238ff6beb..099f7fd69 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2648,31 +2648,25 @@ def to_dataframe( if pyarrow.types.is_timestamp(col.type) ) - if len(record_batch) > 0: - df = record_batch.to_pandas( + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( date_as_object=date_as_object, - timestamp_as_object=timestamp_as_object, - integer_object_nulls=True, - types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object, - bool_dtype=bool_dtype, - int_dtype=int_dtype, - float_dtype=float_dtype, - string_dtype=string_dtype, - date_dtype=date_dtype, - datetime_dtype=datetime_dtype, - time_dtype=time_dtype, - timestamp_dtype=timestamp_dtype, - range_date_dtype=range_date_dtype, - range_datetime_dtype=range_datetime_dtype, - range_timestamp_dtype=range_timestamp_dtype, - ), - ) - else: - # Avoid "ValueError: need at least one array to concatenate" on - # older versions of pandas when converting empty RecordBatch to - # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 - df = pandas.DataFrame([], columns=record_batch.schema.names) + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, + ), + ) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) diff --git a/pyproject.toml b/pyproject.toml index 17bf4fd20..38d74cdd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ bqstorage = [ "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.4", + "pandas >= 1.3.0", "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 63b5d8bf6..cb6c29f3b 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -21,7 +21,7 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==24.2.0 -pandas==1.1.4 +pandas==1.3.0 pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 01f552435..1fe7ff2cd 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - # Result is dependent upon which version of pandas is being used. - # Float64 was not introduced until pandas version 1.4. - if PANDAS_INSTALLED_VERSION >= "1.4": - assert df.dtypes["float64_col"].name == "Float64" - else: - assert df.dtypes["float64_col"].name == "string" + assert df.dtypes["float64_col"].name == "Float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b846036ab..3588cfba6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -4143,14 +4143,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") - # While pyproject.toml lists pandas 1.1 as the lowest supported version of - # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy - if hasattr(pandas, "Float64Dtype"): - self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) - self.assertEqual(df.miles.dtype.name, "Float64") - else: - self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") if hasattr(pandas, "ArrowDtype"): self.assertEqual( From c2343dd4a55cfe90bf450547eba45945e6d2ede6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 19:50:49 +0200 Subject: [PATCH 04/17] chore(deps): update dependency shapely to v2.1.0 (#2155) * chore(deps): update dependency shapely to v2.1.0 * pin Shapely===2.0.7 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 514e19d2c..5fe9005cc 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,8 @@ pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.7 +Shapely===2.0.7; python_version == '3.9' +Shapely==2.1.0; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.0 typing-inspect==0.9.0 From e89a707b162182ededbf94cc9a0f7594bc2be475 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 3 Apr 2025 11:45:08 -0700 Subject: [PATCH 05/17] fix: table iterator should not use bqstorage when page_size is not None (#2154) * fix: table iterator should not use bqstorage when page_size is not None * fix dbapi cursor tests --- google/cloud/bigquery/table.py | 11 +++++++++-- tests/unit/test_dbapi_cursor.py | 1 + tests/unit/test_table.py | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 099f7fd69..8a3b6151a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1873,6 +1873,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def page_size(self) -> Optional[int]: + """The maximum number of rows in each page of results from this request, if present.""" + return self._page_size + def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1924,7 +1929,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None: + if self.max_results is not None or self.page_size is not None: return False try: @@ -1994,7 +1999,9 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and self.max_results is not None: + if bqstorage_client is not None and ( + self.max_results is not None or self.page_size is not None + ): warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 6fca4cec0..cba9030de 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -161,6 +161,7 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None + mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3588cfba6..a9966f1ce 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2693,6 +2693,13 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__should_use_bqstorage_returns_false_if_page_size_set(self): + iterator = self._make_one(page_size=10, first_page_response=None) # not cached + result = iterator._should_use_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From ae632c5a88546d7c60c7780af7baa4f4c5e4e5a4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 23:20:01 +0200 Subject: [PATCH 06/17] chore(deps): update dependency typing-extensions to v4.13.1 (#2156) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5fe9005cc..37bcdf687 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -37,6 +37,6 @@ rsa==4.9 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.0 +typing-extensions==4.13.1 typing-inspect==0.9.0 urllib3==2.3.0 From 22b80bba9d0bed319fd3102e567906c9b458dd02 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 10 Apr 2025 10:13:17 -0700 Subject: [PATCH 07/17] feat: add preview support for incremental results (#2145) * feat: add preview support for incremental results Plumbs support to enable incremental results. * fastpath allow * add fastquery test * lint * lint * blacken --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/job/query.py | 15 +++++++++++++++ tests/unit/job/test_query_config.py | 5 +++++ tests/unit/test__job_helpers.py | 12 ++++++++++++ 4 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index a8373c356..9193f8184 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -564,6 +564,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "maximumBytesBilled", "requestId", "createSession", + "writeIncrementalResults", } unsupported_keys = request_keys - keys_allowlist diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index a27c10530..f14039bc0 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -674,6 +674,21 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def write_incremental_results(self) -> Optional[bool]: + """This is only supported for a SELECT query using a temporary table. + + If set, the query is allowed to write results incrementally to the temporary result + table. This may incur a performance penalty. This option cannot be used with Legacy SQL. + + This feature is not generally available. + """ + return self._get_sub_prop("writeIncrementalResults") + + @write_incremental_results.setter + def write_incremental_results(self, value): + self._set_sub_prop("writeIncrementalResults", value) + @property def table_definitions(self): """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index 7818236f4..e0878d067 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -167,6 +167,11 @@ def test_connection_properties(self): self.assertEqual(config.connection_properties[1].key, "time_zone") self.assertEqual(config.connection_properties[1].value, "America/Chicago") + def test_incremental_results(self): + config = self._get_target_class()() + config.write_incremental_results = True + self.assertEqual(config.write_incremental_results, True) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 96914d9f9..4fa093c69 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -194,6 +194,13 @@ def make_query_response( make_query_request({"maximumBytesBilled": "987654"}), id="job_config-with-maximum_bytes_billed", ), + pytest.param( + job_query.QueryJobConfig( + write_incremental_results=True, + ), + make_query_request({"writeIncrementalResults": True}), + id="job_config-with-incremental-results", + ), ), ) def test__to_query_request(job_config, expected): @@ -1141,6 +1148,11 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="priority=BATCH", ), + pytest.param( + job_query.QueryJobConfig(write_incremental_results=True), + True, + id="write_incremental_results", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From b162288eb3be5a8bd23b05070eae52fe6c813b1b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:37:54 -0700 Subject: [PATCH 08/17] chore(python): remove .gitignore from templates (#2160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove .gitignore from templates Source-Link: https://github.com/googleapis/synthtool/commit/419d94cdddd0d859ac6743ffebd177693c8a027f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- owlbot.py | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9d743afe8..51b21a62b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 -# created: 2025-03-07 + digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb +# created: 2025-04-10T17:00:10.042601326Z diff --git a/owlbot.py b/owlbot.py index fceeaa1b6..8cfa2b097 100644 --- a/owlbot.py +++ b/owlbot.py @@ -130,14 +130,6 @@ 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', ) - -# ---------------------------------------------------------------------------- -# pytype-related changes -# ---------------------------------------------------------------------------- - -# Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 1cabacbcec17a14d80e62627129cdf26696acabe Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Apr 2025 15:23:01 -0400 Subject: [PATCH 09/17] test: adds pytest-xdist to speed up processing of CI/CD checks (#2153) * experimentation using pytest-xdist * adds pytest-xdist to nox system session for experimentation * adds pytest-xdist install AND -n=auto argument * updates sample noxfiles * updates pytest version in requirements-test.txt files * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements-test.txt --- noxfile.py | 23 ++++++++++++++++++++--- samples/desktopapp/requirements-test.txt | 1 + samples/geography/requirements-test.txt | 1 + samples/magics/requirements-test.txt | 1 + samples/notebooks/requirements-test.txt | 1 + samples/snippets/requirements-test.txt | 1 + 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1b118836b..c2b4bbb50 100644 --- a/noxfile.py +++ b/noxfile.py @@ -98,6 +98,7 @@ def default(session, install_extras=True): "pytest", "google-cloud-testutils", "pytest-cov", + "pytest-xdist", "freezegun", "-c", constraints_path, @@ -129,6 +130,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", @@ -224,7 +226,12 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", + "psutil", + "pytest-xdist", + "google-cloud-testutils", + "-c", + constraints_path, ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -257,6 +264,7 @@ def system(session): # Run py.test against the system tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), @@ -310,7 +318,9 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("pytest", "google-cloud-testutils", "-c", constraints_path) + session.install( + "pytest", "pytest-xdist", "google-cloud-testutils", "-c", constraints_path + ) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -326,9 +336,12 @@ def snippets(session): # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. - session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) + session.run( + "py.test", "-n=auto", os.path.join("docs", "snippets.py"), *session.posargs + ) session.run( "py.test", + "-n=auto", "samples", "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", @@ -393,6 +406,7 @@ def prerelease_deps(session): "google-cloud-testutils", "psutil", "pytest", + "pytest-xdist", "pytest-cov", ) @@ -439,18 +453,21 @@ def prerelease_deps(session): # Run all tests, except a few samples tests which require extra dependencies. session.run( "py.test", + "-n=auto", "tests/unit", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "tests/system", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "samples/tests", "-W default::PendingDeprecationWarning", ) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 5d20a4554..7b01ce8ac 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 197b89187..0cf0bb6b4 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 From a69d6b796d2edb6ba453980c9553bc9b206c5a6e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 16 Apr 2025 05:20:30 -0400 Subject: [PATCH 10/17] feat: adds condition class and assoc. unit tests (#2159) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string --- google/cloud/bigquery/dataset.py | 93 ++++++++++++++++++- tests/unit/test_dataset.py | 155 +++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 15a11fb40..cc14598fe 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ import copy import typing +from typing import Optional, List, Dict, Any, Union import google.cloud._helpers # type: ignore @@ -29,8 +30,6 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import external_config -from typing import Optional, List, Dict, Any, Union - def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. @@ -1074,3 +1073,93 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference + + +class Condition(object): + """Represents a textual expression in the Common Expression Language (CEL) syntax. + + Typically used for filtering or policy rules, such as in IAM Conditions + or BigQuery row/column access policies. + + See: + https://cloud.google.com/iam/docs/reference/rest/Shared.Types/Expr + https://github.com/google/cel-spec + + Args: + expression (str): + The condition expression string using CEL syntax. This is required. + Example: ``resource.type == "compute.googleapis.com/Instance"`` + title (Optional[str]): + An optional title for the condition, providing a short summary. + Example: ``"Request is for a GCE instance"`` + description (Optional[str]): + An optional description of the condition, providing a detailed explanation. + Example: ``"This condition checks whether the resource is a GCE instance."`` + """ + + def __init__( + self, + expression: str, + title: Optional[str] = None, + description: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} + # Use setters to initialize properties, which also handle validation + self.expression = expression + self.title = title + self.description = description + + @property + def title(self) -> Optional[str]: + """Optional[str]: The title for the condition.""" + return self._properties.get("title") + + @title.setter + def title(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for title, or None") + self._properties["title"] = value + + @property + def description(self) -> Optional[str]: + """Optional[str]: The description for the condition.""" + return self._properties.get("description") + + @description.setter + def description(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for description, or None") + self._properties["description"] = value + + @property + def expression(self) -> str: + """str: The expression string for the condition.""" + + # Cast assumes expression is always set due to __init__ validation + return typing.cast(str, self._properties.get("expression")) + + @expression.setter + def expression(self, value: str): + if not isinstance(value, str): + raise ValueError("Pass a non-empty string for expression") + if not value: + raise ValueError("expression cannot be an empty string") + self._properties["expression"] = value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this Condition.""" + return self._properties + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": + """Factory: construct a Condition instance given its API representation.""" + + # Ensure required fields are present in the resource if necessary + if "expression" not in resource: + raise ValueError("API representation missing required 'expression' field.") + + return cls( + expression=resource["expression"], + title=resource.get("title"), + description=resource.get("description"), + ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 8ab8dffec..036e22458 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -19,6 +19,7 @@ import pytest from google.cloud.bigquery.dataset import ( AccessEntry, + Condition, Dataset, DatasetReference, Table, @@ -1228,3 +1229,157 @@ def test_table(self): self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, project) + + +class TestCondition: + EXPRESSION = 'resource.name.startsWith("projects/my-project/instances/")' + TITLE = "Instance Access" + DESCRIPTION = "Access to instances in my-project" + + @pytest.fixture + def condition_instance(self): + """Provides a Condition instance for tests.""" + return Condition( + expression=self.EXPRESSION, + title=self.TITLE, + description=self.DESCRIPTION, + ) + + @pytest.fixture + def condition_api_repr(self): + """Provides the API representation for the test Condition.""" + return { + "expression": self.EXPRESSION, + "title": self.TITLE, + "description": self.DESCRIPTION, + } + + # --- Basic Functionality Tests --- + + def test_constructor_and_getters_full(self, condition_instance): + """Test initialization with all arguments and subsequent attribute access.""" + assert condition_instance.expression == self.EXPRESSION + assert condition_instance.title == self.TITLE + assert condition_instance.description == self.DESCRIPTION + + def test_constructor_and_getters_minimal(self): + """Test initialization with only the required expression.""" + condition = Condition(expression=self.EXPRESSION) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_setters(self, condition_instance): + """Test setting attributes after initialization.""" + new_title = "New Title" + new_desc = "New Description" + new_expr = "request.time < timestamp('2024-01-01T00:00:00Z')" + + condition_instance.title = new_title + assert condition_instance.title == new_title + + condition_instance.description = new_desc + assert condition_instance.description == new_desc + + condition_instance.expression = new_expr + assert condition_instance.expression == new_expr + + # Test setting title and description to empty strings + condition_instance.title = "" + assert condition_instance.title == "" + + condition_instance.description = "" + assert condition_instance.description == "" + + # Test setting optional fields back to None + condition_instance.title = None + assert condition_instance.title is None + condition_instance.description = None + assert condition_instance.description is None + + # --- API Representation Tests --- + + def test_to_api_repr_full(self, condition_instance, condition_api_repr): + """Test converting a fully populated Condition to API representation.""" + api_repr = condition_instance.to_api_repr() + assert api_repr == condition_api_repr + + def test_to_api_repr_minimal(self): + """Test converting a minimally populated Condition to API representation.""" + condition = Condition(expression=self.EXPRESSION) + expected_api_repr = { + "expression": self.EXPRESSION, + "title": None, + "description": None, + } + api_repr = condition.to_api_repr() + assert api_repr == expected_api_repr + + def test_from_api_repr_full(self, condition_api_repr): + """Test creating a Condition from a full API representation.""" + condition = Condition.from_api_repr(condition_api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description == self.DESCRIPTION + + def test_from_api_repr_minimal(self): + """Test creating a Condition from a minimal API representation.""" + minimal_repr = {"expression": self.EXPRESSION} + condition = Condition.from_api_repr(minimal_repr) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_from_api_repr_with_extra_fields(self): + """Test creating a Condition from an API repr with unexpected fields.""" + api_repr = { + "expression": self.EXPRESSION, + "title": self.TITLE, + "unexpected_field": "some_value", + } + condition = Condition.from_api_repr(api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description is None + # Check that the extra field didn't get added to internal properties + assert "unexpected_field" not in condition._properties + + # # --- Validation Tests --- + + @pytest.mark.parametrize( + "kwargs, error_msg", + [ + ({"expression": None}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": ""}, "expression cannot be an empty string"), + ({"expression": 123}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": EXPRESSION, "title": 123}, "Pass a string for title, or None"), # type: ignore + ({"expression": EXPRESSION, "description": False}, "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_init(self, kwargs, error_msg): + """Test validation during __init__.""" + with pytest.raises(ValueError, match=error_msg): + Condition(**kwargs) + + @pytest.mark.parametrize( + "attribute, value, error_msg", + [ + ("expression", None, "Pass a non-empty string for expression"), # type: ignore + ("expression", "", "expression cannot be an empty string"), + ("expression", 123, "Pass a non-empty string for expression"), # type: ignore + ("title", 123, "Pass a string for title, or None"), # type: ignore + ("description", [], "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_setters(self, condition_instance, attribute, value, error_msg): + """Test validation via setters.""" + with pytest.raises(ValueError, match=error_msg): + setattr(condition_instance, attribute, value) + + def test_validation_expression_required_from_api(self): + """Test ValueError is raised if expression is missing in from_api_repr.""" + api_repr = {"title": self.TITLE} + with pytest.raises( + ValueError, match="API representation missing required 'expression' field." + ): + Condition.from_api_repr(api_repr) From ca1798aaee2d5905fe688d3097f8ee5c989da333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 24 Apr 2025 15:46:59 -0500 Subject: [PATCH 11/17] fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored (#2167) * fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored * Update google/cloud/bigquery/client.py * allow None for job_retry in code path that calls jobs.query from client.query * allow None for job_retry in code path that calls jobs.query from client.query * Update tests/unit/test_job_retry.py --- google/cloud/bigquery/_job_helpers.py | 42 ++++++++++++++++++++++++++- google/cloud/bigquery/client.py | 15 ++-------- tests/unit/test_job_retry.py | 18 ++++++++---- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 9193f8184..4a884ada5 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,9 @@ import functools import os import uuid +import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union +import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -198,6 +200,44 @@ def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") +def validate_job_retry(job_id: Optional[str], job_retry: Optional[retries.Retry]): + """Catch common mistakes, such as setting a job_id and job_retry at the same + time. + """ + if job_id is not None and job_retry is not None: + # TODO(tswast): To avoid breaking changes but still allow a default + # query job retry, we currently only raise if they explicitly set a + # job_retry other than the default. In a future version, we may want to + # avoid this check for DEFAULT_JOB_RETRY and always raise. + if job_retry is not google.cloud.bigquery.retry.DEFAULT_JOB_RETRY: + raise TypeError( + textwrap.dedent( + """ + `job_retry` was provided, but the returned job is + not retryable, because a custom `job_id` was + provided. To customize the job ID and allow for job + retries, set job_id_prefix, instead. + """ + ).strip() + ) + else: + warnings.warn( + textwrap.dedent( + """ + job_retry must be explicitly set to None if job_id is set. + BigQuery cannot retry a failed job by using the exact + same ID. Setting job_id without explicitly disabling + job_retry will raise an error in the future. To avoid this + warning, either use job_id_prefix instead (preferred) or + set job_retry=None. + """ + ).strip(), + category=FutureWarning, + # user code -> client.query / client.query_and_wait -> validate_job_retry + stacklevel=3, + ) + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -308,7 +348,7 @@ def query_jobs_query( project: str, retry: retries.Retry, timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8bbdd6c32..e7cafc47e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3388,7 +3388,7 @@ def query( project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - job_retry: retries.Retry = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3455,18 +3455,9 @@ def query( class, or if both ``job_id`` and non-``None`` non-default ``job_retry`` are provided. """ - job_id_given = job_id is not None - if ( - job_id_given - and job_retry is not None - and job_retry is not DEFAULT_JOB_RETRY - ): - raise TypeError( - "`job_retry` was provided, but the returned job is" - " not retryable, because a custom `job_id` was" - " provided." - ) + _job_helpers.validate_job_retry(job_id, job_retry) + job_id_given = job_id is not None if job_id_given and api_method == enums.QueryApiMethod.QUERY: raise TypeError( "`job_id` was provided, but the 'QUERY' `api_method` was requested." diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 958986052..7144c640b 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -511,26 +511,34 @@ def api_request(method, path, query_params=None, data=None, **kw): def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but the returned job is" " not retryable, because a custom `job_id` was" " provided." - ), + ).replace(" ", r"\s"), ): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): client._connection = make_connection({}) - job = client.query("select 42", job_id=42) + + with pytest.warns( + FutureWarning, + match=re.escape("job_retry must be explicitly set to None if job_id is set."), + ): + # Implicitly providing a job_retry is a warning and will be an error in the future. + job = client.query("select 42", job_id=42) + with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but this job is" " not retryable, because a custom `job_id` was" " provided to the query that created this job." - ), + ).replace(" ", r"\s"), ): + # Explicitly providing a job_retry is an error. job.result(job_retry=google.api_core.retry.Retry()) From a1c8e9aaf60986924868d54a0ab0334e77002a39 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 25 Apr 2025 10:29:54 -0700 Subject: [PATCH 12/17] feat: support BigLakeConfiguration (managed Iceberg tables) (#2162) * feat: support BigLakeConfiguration (managed Iceberg tables) This PR adds the BigLakeConfiguration class to tables, and the necessary property mappings from Table. It also adds some utility enums (BigLakeFileFormat, BigLakeTableFormat) to more easily communicate available values for configuraiton. --- google/cloud/bigquery/enums.py | 16 ++++ google/cloud/bigquery/table.py | 150 +++++++++++++++++++++++++++++++ tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++++ 3 files changed, 326 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 5519bc989..b32fc8200 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values): ROUNDING_MODE_UNSPECIFIED = enum.auto() ROUND_HALF_AWAY_FROM_ZERO = enum.auto() ROUND_HALF_EVEN = enum.auto() + + +class BigLakeFileFormat(object): + FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + PARQUET = "PARQUET" + """Apache Parquet format.""" + + +class BigLakeTableFormat(object): + TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + ICEBERG = "ICEBERG" + """Apache Iceberg format.""" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8a3b6151a..503ca4e71 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -380,6 +380,7 @@ class Table(_TableBase): _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, + "biglake_configuration": "biglakeConfiguration", "clustering_fields": "clustering", "created": "creationTime", "description": "description", @@ -431,6 +432,29 @@ def __init__(self, table_ref, schema=None) -> None: reference = property(_reference_getter) + @property + def biglake_configuration(self): + """google.cloud.bigquery.table.BigLakeConfiguration: Configuration + for managed tables for Apache Iceberg. + + See https://cloud.google.com/bigquery/docs/iceberg-tables for more information. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ) + if prop is not None: + prop = BigLakeConfiguration.from_api_repr(prop) + return prop + + @biglake_configuration.setter + def biglake_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties[ + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ] = api_repr + @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -3501,6 +3525,132 @@ def to_api_repr(self) -> Dict[str, Any]: return resource +class BigLakeConfiguration(object): + """Configuration for managed tables for Apache Iceberg, formerly + known as BigLake. + + Args: + connection_id (Optional[str]): + The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage. The connection_id can have the form + ``{project}.{location}.{connection_id}`` or + ``projects/{project}/locations/{location}/connections/{connection_id}``. + storage_uri (Optional[str]): + The fully qualified location prefix of the external folder where table data is + stored. The '*' wildcard character is not allowed. The URI should be in the + format ``gs://bucket/path_to_table/``. + file_format (Optional[str]): + The file format the table data is stored in. See BigLakeFileFormat for available + values. + table_format (Optional[str]): + The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + storage_uri: Optional[str] = None, + file_format: Optional[str] = None, + table_format: Optional[str] = None, + _properties: Optional[dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + if connection_id is not None: + self.connection_id = connection_id + if storage_uri is not None: + self.storage_uri = storage_uri + if file_format is not None: + self.file_format = file_format + if table_format is not None: + self.table_format = table_format + + @property + def connection_id(self) -> Optional[str]: + """str: The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage.""" + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + self._properties["connectionId"] = value + + @property + def storage_uri(self) -> Optional[str]: + """str: The fully qualified location prefix of the external folder where table data is + stored.""" + return self._properties.get("storageUri") + + @storage_uri.setter + def storage_uri(self, value: Optional[str]): + self._properties["storageUri"] = value + + @property + def file_format(self) -> Optional[str]: + """str: The file format the table data is stored in. See BigLakeFileFormat for available + values.""" + return self._properties.get("fileFormat") + + @file_format.setter + def file_format(self, value: Optional[str]): + self._properties["fileFormat"] = value + + @property + def table_format(self) -> Optional[str]: + """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values.""" + return self._properties.get("tableFormat") + + @table_format.setter + def table_format(self, value: Optional[str]): + self._properties["tableFormat"] = value + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, BigLakeConfiguration): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "BigLakeConfiguration({})".format(",".join(key_vals)) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration": + """Factory: construct a BigLakeConfiguration given its API representation. + + Args: + resource: + BigLakeConfiguration representation returned from the API + + Returns: + BigLakeConfiguration parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this BigLakeConfiguration. + + Returns: + BigLakeConfiguration represented as an API resource. + """ + return copy.deepcopy(self._properties) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a9966f1ce..253006547 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -435,6 +435,12 @@ def _make_resource(self): "sourceFormat": "CSV", "csvOptions": {"allowJaggedRows": True, "encoding": "encoding"}, }, + "biglakeConfiguration": { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + }, "labels": {"x": "y"}, } @@ -521,6 +527,15 @@ def _verifyResourceProperties(self, table, resource): else: self.assertIsNone(table.encryption_configuration) + if "biglakeConfiguration" in resource: + self.assertIsNotNone(table.biglake_configuration) + self.assertEqual(table.biglake_configuration.connection_id, "connection") + self.assertEqual(table.biglake_configuration.storage_uri, "uri") + self.assertEqual(table.biglake_configuration.file_format, "PARQUET") + self.assertEqual(table.biglake_configuration.table_format, "ICEBERG") + else: + self.assertIsNone(table.biglake_configuration) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -893,6 +908,60 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_biglake_configuration_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.biglake_configuration is None + + def test_biglake_configuration_set(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["biglakeConfiguration"] = { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + config = table.biglake_configuration + + assert isinstance(config, BigLakeConfiguration) + assert config.connection_id == "connection" + assert config.storage_uri == "uri" + assert config.file_format == "PARQUET" + assert config.table_format == "ICEBERG" + + def test_biglake_configuration_property_setter(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + config = BigLakeConfiguration( + connection_id="connection", + storage_uri="uri", + file_format="PARQUET", + table_format="ICEBERG", + ) + table.biglake_configuration = config + + assert table._properties["biglakeConfiguration"] == { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + table.biglake_configuration = None + assert table.biglake_configuration is None + def test_table_constraints_property_setter(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -2166,6 +2235,97 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestBigLakeConfiguration(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import BigLakeConfiguration + + return BigLakeConfiguration + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one() + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertIsNone(instance.file_format) + self.assertIsNone(instance.table_format) + + def test_ctor_kwargs(self): + instance = self._make_one( + connection_id="conn", + storage_uri="uri", + file_format="FILE", + table_format="TABLE", + ) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_ctor_full_resource(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_to_api_repr(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.to_api_repr(), resource) + + def test_from_api_repr_partial(self): + klass = self._get_target_class() + api_repr = {"fileFormat": "FILE"} + instance = klass.from_api_repr(api_repr) + + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertEqual(instance.file_format, "FILE") + self.assertIsNone(instance.table_format) + + def test_comparisons(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + + first = self._make_one(_properties=resource) + second = self._make_one(_properties=copy.deepcopy(resource)) + # Exercise comparator overloads. + # first and second should be equivalent. + self.assertNotEqual(first, resource) + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + # Update second to ensure that first and second are no longer equivalent. + second.connection_id = "foo" + self.assertNotEqual(first, second) + self.assertNotEqual(hash(first), hash(second)) + + # Update first with the same change, restoring equivalence. + first.connection_id = "foo" + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + class TestCloneDefinition: @staticmethod def _get_target_class(): From 7301667272dfbdd04b1a831418a9ad2d037171fb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 29 Apr 2025 09:16:36 -0400 Subject: [PATCH 13/17] feat: Update the AccessEntry class with a new condition attribute and unit tests (#2163) * feat: adds condition class and assoc. unit tests * Updates AccessEntry with condition setter/getter * Adds condition attr to AccessEntry and unit tests * adds tests for Condition dunder methods to ensure coverage * moves the entity_type logic out of _from_api_repr to entity_type setter * Updates logic in entity_type getter * updates several AccessEntry related tests * Updates AccessEntry condition setter test to use a dict * udpates entity_id handling * Updates _entity_type access * tweaks type hinting * Update tests/unit/test_dataset.py * Update tests/unit/test_dataset.py * Updates DatasetReference in test and __eq__ check * remove debug print statement --- google/cloud/bigquery/dataset.py | 126 ++++++++++-- tests/unit/test_dataset.py | 336 +++++++++++++++++++++++++++++-- 2 files changed, 432 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index cc14598fe..670fe127c 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -298,12 +298,15 @@ def __init__( role: Optional[str] = None, entity_type: Optional[str] = None, entity_id: Optional[Union[Dict[str, Any], str]] = None, + **kwargs, ): - self._properties = {} + self._properties: Dict[str, Any] = {} if entity_type is not None: self._properties[entity_type] = entity_id self._properties["role"] = role - self._entity_type = entity_type + self._entity_type: Optional[str] = entity_type + for prop, val in kwargs.items(): + setattr(self, prop, val) @property def role(self) -> Optional[str]: @@ -330,6 +333,9 @@ def dataset(self, value): if isinstance(value, str): value = DatasetReference.from_string(value).to_api_repr() + if isinstance(value, DatasetReference): + value = value.to_api_repr() + if isinstance(value, (Dataset, DatasetListItem)): value = value.reference.to_api_repr() @@ -437,15 +443,65 @@ def special_group(self) -> Optional[str]: def special_group(self, value): self._properties["specialGroup"] = value + @property + def condition(self) -> Optional["Condition"]: + """Optional[Condition]: The IAM condition associated with this entry.""" + value = typing.cast(Dict[str, Any], self._properties.get("condition")) + return Condition.from_api_repr(value) if value else None + + @condition.setter + def condition(self, value: Union["Condition", dict, None]): + """Set the IAM condition for this entry.""" + if value is None: + self._properties["condition"] = None + elif isinstance(value, Condition): + self._properties["condition"] = value.to_api_repr() + elif isinstance(value, dict): + self._properties["condition"] = value + else: + raise TypeError("condition must be a Condition object, dict, or None") + @property def entity_type(self) -> Optional[str]: """The entity_type of the entry.""" + + # The api_repr for an AccessEntry object is expected to be a dict with + # only a few keys. Two keys that may be present are role and condition. + # Any additional key is going to have one of ~eight different names: + # userByEmail, groupByEmail, domain, dataset, specialGroup, view, + # routine, iamMember + + # if self._entity_type is None, see if it needs setting + # i.e. is there a key: value pair that should be associated with + # entity_type and entity_id? + if self._entity_type is None: + resource = self._properties.copy() + # we are empyting the dict to get to the last `key: value`` pair + # so we don't keep these first entries + _ = resource.pop("role", None) + _ = resource.pop("condition", None) + + try: + # we only need entity_type, because entity_id gets set elsewhere. + entity_type, _ = resource.popitem() + except KeyError: + entity_type = None + + self._entity_type = entity_type + return self._entity_type @property def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: """The entity_id of the entry.""" - return self._properties.get(self._entity_type) if self._entity_type else None + if self.entity_type: + entity_type = self.entity_type + else: + return None + return typing.cast( + Optional[Union[Dict[str, Any], str]], + self._properties.get(entity_type, None), + ) def __eq__(self, other): if not isinstance(other, AccessEntry): @@ -464,7 +520,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert condition to a hashable datatype(s) + condition = properties.get("condition") + if isinstance(condition, dict): + condition_key = tuple(sorted(condition.items())) + properties["condition"] = condition_key + prop_tup = tuple(sorted(properties.items())) return (self.role, self._entity_type, self.entity_id, prop_tup) @@ -491,19 +556,11 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": Returns: google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. - - Raises: - ValueError: - If the resource has more keys than ``role`` and one additional - key. """ - entry = resource.copy() - role = entry.pop("role", None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError("Entry has unexpected keys remaining.", entry) - return cls(role, entity_type, entity_id) + access_entry = cls() + access_entry._properties = resource.copy() + return access_entry class Dataset(object): @@ -1160,6 +1217,43 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title"), - description=resource.get("description"), + title=resource.get("title", None), + description=resource.get("description", None), ) + + def __eq__(self, other: object) -> bool: + """Check for equality based on expression, title, and description.""" + if not isinstance(other, Condition): + return NotImplemented + return self._key() == other._key() + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert object to a hashable datatype(s) + prop_tup = tuple(sorted(properties.items())) + return prop_tup + + def __ne__(self, other: object) -> bool: + """Check for inequality.""" + return not self == other + + def __hash__(self) -> int: + """Generate a hash based on expression, title, and description.""" + return hash(self._key()) + + def __repr__(self) -> str: + """Return a string representation of the Condition object.""" + parts = [f"expression={self.expression!r}"] + if self.title is not None: + parts.append(f"title={self.title!r}") + if self.description is not None: + parts.append(f"description={self.description!r}") + return f"Condition({', '.join(parts)})" diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 036e22458..51f1809bf 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -167,7 +167,10 @@ def test_from_api_repr_wo_role(self): entity_type="view", entity_id=resource["view"], ) - self.assertEqual(entry, exp_entry) + + assert entry.entity_type == exp_entry.entity_type + assert entry.entity_id == exp_entry.entity_id + assert entry.role is None def test_to_api_repr_w_extra_properties(self): resource = { @@ -179,15 +182,6 @@ def test_to_api_repr_w_extra_properties(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) - def test_from_api_repr_entries_w_extra_keys(self): - resource = { - "role": "READER", - "specialGroup": "projectReaders", - "userByEmail": "salmon@example.com", - } - with self.assertRaises(ValueError): - self._get_target_class().from_api_repr(resource) - def test_view_getter_setter(self): view = { "projectId": "my_project", @@ -307,7 +301,10 @@ def test_dataset_getter_setter_dataset_ref(self): entry.dataset = dataset_ref resource = entry.to_api_repr() exp_resource = { - "dataset": {"dataset": dataset_ref, "targetTypes": None}, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, "role": None, } self.assertEqual(resource, exp_resource) @@ -494,6 +491,262 @@ def test_dataset_target_types_getter_setter_w_dataset(self): self.assertEqual(entry.dataset_target_types, target_types) +# --- Tests for AccessEntry when using Condition --- + +EXPRESSION = "request.time < timestamp('2026-01-01T00:00:00Z')" +TITLE = "Expires end 2025" +DESCRIPTION = "Access expires at the start of 2026." + + +@pytest.fixture +def condition_1(): + """Provides a sample Condition object.""" + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ) + + +@pytest.fixture +def condition_1_api_repr(): + """Provides the API representation for condition_1.""" + # Use the actual to_api_repr method + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ).to_api_repr() + + +@pytest.fixture +def condition_2(): + """Provides a second, different Condition object.""" + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ) + + +@pytest.fixture +def condition_2_api_repr(): + """Provides the API representation for condition2.""" + # Use the actual to_api_repr method + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ).to_api_repr() + + +class TestAccessEntryAndCondition: + @staticmethod + def _get_target_class(): + return AccessEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + # Test __init__ without condition + def test_init_without_condition(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com") + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "test@example.com" + assert entry.condition is None + # Accessing _properties is for internal verification in tests + assert "condition" not in entry._properties + + # Test __init__ with condition object + def test_init_with_condition_object(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "READER", "userByEmail", "test@example.com", condition=condition_1 + ) + assert entry.condition == condition_1 + assert entry._properties.get("condition") == condition_1_api_repr + + # Test __init__ with condition=None + def test_init_with_condition_none(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com", condition=None) + assert entry.condition is None + + # Test condition getter/setter + def test_condition_getter_setter( + self, condition_1, condition_1_api_repr, condition_2, condition_2_api_repr + ): + entry = AccessEntry("WRITER", "group", "admins@example.com") + assert entry.condition is None + + # Set condition 1 + entry.condition = condition_1 + assert entry.condition.to_api_repr() == condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Set condition 2 + entry.condition = condition_2 + assert entry.condition.to_api_repr() == condition_2_api_repr + assert entry._properties.get("condition") != condition_1_api_repr + assert entry._properties.get("condition") == condition_2.to_api_repr() + + # Set back to None + entry.condition = None + assert entry.condition is None + + # Set condition using a dict + entry.condition = condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Test setter validation + def test_condition_setter_invalid_type(self): + entry = AccessEntry("READER", "domain", "example.com") + with pytest.raises( + TypeError, match="condition must be a Condition object, dict, or None" + ): + entry.condition = 123 # type: ignore + + # Test equality/hash without condition + def test_equality_and_hash_without_condition(self): + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry3 = AccessEntry("WRITER", "specialGroup", "projectOwners") + assert entry1 == entry2 + assert entry1 != entry3 + assert hash(entry1) == hash(entry2) + assert hash(entry1) != hash(entry3) # Usually true + + def test_equality_and_hash_with_condition(self, condition_1, condition_2): + cond1a = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) + cond1b = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) # Same values, different object + + entry1a = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1a + ) + entry1b = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1b + ) # Different Condition instance + entry2 = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=condition_2 + ) + entry3 = AccessEntry("READER", "userByEmail", "a@example.com") # No condition + entry4 = AccessEntry( + "WRITER", "userByEmail", "a@example.com", condition=cond1a + ) # Different role + + assert entry1a == entry1b + assert entry1a != entry2 + assert entry1a != entry3 + assert entry1a != entry4 + assert entry2 != entry3 + + assert hash(entry1a) == hash(entry1b) + assert hash(entry1a) != hash(entry2) # Usually true + assert hash(entry1a) != hash(entry3) # Usually true + assert hash(entry1a) != hash(entry4) # Usually true + + # Test to_api_repr with condition + def test_to_api_repr_with_condition(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "WRITER", "groupByEmail", "editors@example.com", condition=condition_1 + ) + expected_repr = { + "role": "WRITER", + "groupByEmail": "editors@example.com", + "condition": condition_1_api_repr, + } + assert entry.to_api_repr() == expected_repr + + def test_view_property_with_condition(self, condition_1): + """Test setting/getting view property when condition is present.""" + entry = AccessEntry(role=None, entity_type="view", condition=condition_1) + view_ref = TableReference(DatasetReference("proj", "dset"), "view_tbl") + entry.view = view_ref # Use the setter + assert entry.view == view_ref + assert entry.condition == condition_1 # Condition should persist + assert entry.role is None + assert entry.entity_type == "view" + + # Check internal representation + assert "view" in entry._properties + assert "condition" in entry._properties + + def test_user_by_email_property_with_condition(self, condition_1): + """Test setting/getting user_by_email property when condition is present.""" + entry = AccessEntry( + role="READER", entity_type="userByEmail", condition=condition_1 + ) + email = "test@example.com" + entry.user_by_email = email # Use the setter + assert entry.user_by_email == email + assert entry.condition == condition_1 # Condition should persist + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + + # Check internal representation + assert "userByEmail" in entry._properties + assert "condition" in entry._properties + + # Test from_api_repr without condition + def test_from_api_repr_without_condition(self): + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "OWNER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "owner@example.com" + assert entry.condition is None + + # Test from_api_repr with condition + def test_from_api_repr_with_condition(self, condition_1, condition_1_api_repr): + api_repr = { + "role": "READER", + "view": {"projectId": "p", "datasetId": "d", "tableId": "v"}, + "condition": condition_1_api_repr, + } + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type == "view" + # The entity_id for view/routine/dataset is the dict itself + assert entry.entity_id == {"projectId": "p", "datasetId": "d", "tableId": "v"} + assert entry.condition == condition_1 + + # Test from_api_repr edge case + def test_from_api_repr_no_entity(self, condition_1, condition_1_api_repr): + api_repr = {"role": "READER", "condition": condition_1_api_repr} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type is None + assert entry.entity_id is None + assert entry.condition == condition_1 + + def test_dataset_property_with_condition(self, condition_1): + project = "my-project" + dataset_id = "my_dataset" + dataset_ref = DatasetReference(project, dataset_id) + entry = self._make_one(None) + entry.dataset = dataset_ref + entry.condition = condition_1 + + resource = entry.to_api_repr() + exp_resource = { + "role": None, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, + "condition": { + "expression": "request.time < timestamp('2026-01-01T00:00:00Z')", + "title": "Expires end 2025", + "description": "Access expires at the start of 2026.", + }, + } + assert resource == exp_resource + # Check internal representation + assert "dataset" in entry._properties + assert "condition" in entry._properties + + class TestDatasetReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -821,7 +1074,15 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID) ) - self.assertEqual(dataset.access_entries, entries) + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id self.assertIsNone(dataset.created) self.assertIsNone(dataset.full_dataset_id) @@ -854,8 +1115,18 @@ def test_access_entries_setter(self): dataset = self._make_one(self.DS_REF) phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") - dataset.access_entries = [phred, bharney] - self.assertEqual(dataset.access_entries, [phred, bharney]) + entries = [phred, bharney] + dataset.access_entries = entries + + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id def test_default_partition_expiration_ms(self): dataset = self._make_one("proj.dset") @@ -1383,3 +1654,40 @@ def test_validation_expression_required_from_api(self): ValueError, match="API representation missing required 'expression' field." ): Condition.from_api_repr(api_repr) + + def test___eq___equality(self, condition_1): + result = condition_1 + expected = condition_1 + assert result == expected + + def test___eq___equality_not_condition(self, condition_1): + result = condition_1 + other = "not a condition" + expected = result.__eq__(other) + assert expected is NotImplemented + + def test__ne__not_equality(self): + result = condition_1 + expected = condition_2 + assert result != expected + + def test__hash__function(self, condition_2): + cond1 = Condition( + expression=self.EXPRESSION, title=self.TITLE, description=self.DESCRIPTION + ) + cond2 = cond1 + cond_not_equal = condition_2 + assert cond1 == cond2 + assert cond1 is cond2 + assert hash(cond1) == hash(cond2) + assert hash(cond1) is not None + assert cond_not_equal != cond1 + assert hash(cond_not_equal) != hash(cond1) + + def test__hash__with_minimal_inputs(self): + cond1 = Condition( + expression="example", + title=None, + description=None, + ) + assert hash(cond1) is not None From b7656b97c1bd6c204d0508b1851d114719686655 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 May 2025 17:20:21 -0400 Subject: [PATCH 14/17] feat: add dataset access policy version attribute (#2169) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string * Updates tests for clarity * Updates access_policy_version setter and unittest --- google/cloud/bigquery/dataset.py | 15 +++++++++++++-- tests/unit/test_dataset.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 670fe127c..d225b7106 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -589,6 +589,7 @@ class Dataset(object): "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", "external_catalog_dataset_options": "externalCatalogDatasetOptions", + "access_policy_version": "accessPolicyVersion", } def __init__(self, dataset_ref) -> None: @@ -979,6 +980,16 @@ def external_catalog_dataset_options(self, value): self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = (value.to_api_repr() if value is not None else None) + @property + def access_policy_version(self): + return self._properties.get("accessPolicyVersion") + + @access_policy_version.setter + def access_policy_version(self, value): + if not isinstance(value, int) and value is not None: + raise ValueError("Pass an integer, or None") + self._properties["accessPolicyVersion"] = value + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. @@ -1217,8 +1228,8 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title", None), - description=resource.get("description", None), + title=resource.get("title"), + description=resource.get("description"), ) def __eq__(self, other: object) -> bool: diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 51f1809bf..941430827 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1049,6 +1049,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + self.assertIsNone(dataset.access_policy_version) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -1423,6 +1424,35 @@ def test_external_catalog_dataset_options_to_api_repr(self): expected = api_repr["externalCatalogDatasetOptions"] assert result == expected + def test_access_policy_version_valid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + # We rely upon the BQ backend to validate acceptable integer + # values, rather than perform that validation in the client. + for expected in [1, 2, 3, None]: + # set property using setter and integer + dataset.access_policy_version = expected + + # check getter and _properties dict + assert ( + dataset.access_policy_version == expected + ), f"Expected {expected} but got {dataset.access_policy_version}" + assert dataset._properties["accessPolicyVersion"] == expected + + def test_access_policy_version_invalid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + + with pytest.raises(ValueError): + invalid_value = "a string" + dataset.access_policy_version = invalid_value + + with pytest.raises(ValueError): + invalid_value = 42.0 + dataset.access_policy_version = invalid_value + class TestDatasetListItem(unittest.TestCase): @staticmethod From 46927479085f13fd326e3f2388f60dfdd37f7f69 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 1 May 2025 14:52:26 -0700 Subject: [PATCH 15/17] feat: add WRITE_TRUNCATE_DATA enum (#2166) This PR documents the new WRITE_TRUNCATE_DATA write disposition by adding the enum value. internal issue: b/406848221 --- google/cloud/bigquery/enums.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index b32fc8200..203ea3c7b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -338,6 +338,10 @@ class WriteDisposition(object): WRITE_TRUNCATE = "WRITE_TRUNCATE" """If the table already exists, BigQuery overwrites the table data.""" + WRITE_TRUNCATE_DATA = "WRITE_TRUNCATE_DATA" + """For existing tables, truncate data but preserve existing schema + and constraints.""" + WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" From 5c8e9179923d914745eaa98fc52a9d8577fe2484 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 18:34:55 +0200 Subject: [PATCH 16/17] chore(deps): update all dependencies (#2158) * chore(deps): update all dependencies * Update samples/geography/requirements.txt --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 25 ++++++++++++------------ samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index fa349e0d3..b98f4ace9 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.31.0 -google-auth-oauthlib==1.2.1 +google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 37bcdf687..2b5a71c8c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,8 +1,9 @@ attrs==25.3.0 -certifi==2025.1.31 +certifi==2025.4.26 cffi==1.17.1 -charset-normalizer==3.4.1 -click==8.1.8 +charset-normalizer==3.4.2 +click===8.1.8; python_version == '3.9' +click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.2 @@ -10,21 +11,21 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.38.0 +google-auth==2.40.1 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.69.2 +googleapis-common-protos==1.70.0 grpcio==1.71.0 idna==3.10 munch==4.0.0 -mypy-extensions==1.0.0 -packaging==24.2 +mypy-extensions==1.1.0 +packaging==25.0 pandas==2.2.3 proto-plus==1.26.1 -pyarrow==19.0.1 +pyarrow==20.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 @@ -33,10 +34,10 @@ python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 -rsa==4.9 +rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.1 +typing-extensions==4.13.2 typing-inspect==0.9.0 -urllib3==2.3.0 +urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 3ab215951..2c9e158c0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.2 google.cloud.bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index ca5505a2e..d1e2f39fb 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.9.0 db-dtypes==1.4.2 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' -ipython==9.0.2; python_version >= '3.10' +ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.1; python_version >= '3.10' +matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 0cf0bb6b4..6760e1228 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 From 185116ead5f68b959feb339566e964572fe12692 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 13:04:01 -0400 Subject: [PATCH 17/17] chore(main): release 3.32.0 (#2152) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b115464c..ff1bd7acc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) + + +### Features + +* Add dataset access policy version attribute ([#2169](https://github.com/googleapis/python-bigquery/issues/2169)) ([b7656b9](https://github.com/googleapis/python-bigquery/commit/b7656b97c1bd6c204d0508b1851d114719686655)) +* Add preview support for incremental results ([#2145](https://github.com/googleapis/python-bigquery/issues/2145)) ([22b80bb](https://github.com/googleapis/python-bigquery/commit/22b80bba9d0bed319fd3102e567906c9b458dd02)) +* Add WRITE_TRUNCATE_DATA enum ([#2166](https://github.com/googleapis/python-bigquery/issues/2166)) ([4692747](https://github.com/googleapis/python-bigquery/commit/46927479085f13fd326e3f2388f60dfdd37f7f69)) +* Adds condition class and assoc. unit tests ([#2159](https://github.com/googleapis/python-bigquery/issues/2159)) ([a69d6b7](https://github.com/googleapis/python-bigquery/commit/a69d6b796d2edb6ba453980c9553bc9b206c5a6e)) +* Support BigLakeConfiguration (managed Iceberg tables) ([#2162](https://github.com/googleapis/python-bigquery/issues/2162)) ([a1c8e9a](https://github.com/googleapis/python-bigquery/commit/a1c8e9aaf60986924868d54a0ab0334e77002a39)) +* Update the AccessEntry class with a new condition attribute and unit tests ([#2163](https://github.com/googleapis/python-bigquery/issues/2163)) ([7301667](https://github.com/googleapis/python-bigquery/commit/7301667272dfbdd04b1a831418a9ad2d037171fb)) + + +### Bug Fixes + +* `query()` now warns when `job_id` is set and the default `job_retry` is ignored ([#2167](https://github.com/googleapis/python-bigquery/issues/2167)) ([ca1798a](https://github.com/googleapis/python-bigquery/commit/ca1798aaee2d5905fe688d3097f8ee5c989da333)) +* Empty record dtypes ([#2147](https://github.com/googleapis/python-bigquery/issues/2147)) ([77d7173](https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876)) +* Table iterator should not use bqstorage when page_size is not None ([#2154](https://github.com/googleapis/python-bigquery/issues/2154)) ([e89a707](https://github.com/googleapis/python-bigquery/commit/e89a707b162182ededbf94cc9a0f7594bc2be475)) + ## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index c0f7a96d6..fe13d2477 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.31.0" +__version__ = "3.32.0"