From b28520d01c83fde1928861a7250a3e88e04c84d2 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 9 Oct 2024 21:02:55 +0000 Subject: [PATCH 1/2] fix: corrected inability to access MATERIALIZED_VIEW with read_gbq --- bigframes/core/nodes.py | 6 +++--- bigframes/session/_io/bigquery/read_gbq_table.py | 12 ++++++++++++ bigframes/session/loader.py | 7 ++++++- tests/system/small/test_dataframe.py | 4 ++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 3494bee9eb..1d01936509 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -547,7 +547,7 @@ class GbqTable: table_id: str = field() physical_schema: Tuple[bq.SchemaField, ...] = field() n_rows: int = field() - is_physical_table: bool = field() + is_physically_stored: bool = field() cluster_cols: typing.Optional[Tuple[str, ...]] @staticmethod @@ -563,7 +563,7 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable: table_id=table.table_id, physical_schema=schema, n_rows=table.num_rows, - is_physical_table=(table.table_type == "TABLE"), + is_physically_stored=(table.table_type in ["TABLE", "MATERIALIZED_VIEW"]), cluster_cols=None if table.clustering_fields is None else tuple(table.clustering_fields), @@ -641,7 +641,7 @@ def variables_introduced(self) -> int: @property def row_count(self) -> typing.Optional[int]: - if self.source.sql_predicate is None and self.source.table.is_physical_table: + if self.source.sql_predicate is None and self.source.table.is_physically_stored: return self.source.table.n_rows return None diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py index 7585dd3f45..01ff1a3f15 100644 --- a/bigframes/session/_io/bigquery/read_gbq_table.py +++ b/bigframes/session/_io/bigquery/read_gbq_table.py @@ -102,6 +102,7 @@ def validate_table( table_ref: bigquery.table.TableReference, columns: Optional[Sequence[str]], snapshot_time: datetime.datetime, + table_type: str, filter_str: Optional[str] = None, ) -> bool: """Validates that the table can be read, returns True iff snapshot is supported.""" @@ -124,6 +125,17 @@ def validate_table( if table_ref.dataset_id.startswith("_"): return False + # Materialized views,does not support snapshot + if table_type == "MATERIALIZED_VIEW": + warnings.warn( + "Materialized views do not support FOR SYSTEM_TIME AS OF queries. " + "Attempting query without time travel. Be aware that as materialized views " + "are updated periodically, modifications to the underlying data in the view may " + "result in errors or unexpected behavior.", + category=bigframes.exceptions.TimeTravelDisabledWarning, + ) + return False + # Second, try with snapshot to verify table supports this feature snapshot_sql = bigframes.session._io.bigquery.to_query( query_or_table=f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}", diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index 22de367804..923605627d 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -339,7 +339,12 @@ def read_gbq_table( ) enable_snapshot = enable_snapshot and bf_read_gbq_table.validate_table( - self._bqclient, table_ref, all_columns, time_travel_timestamp, filter_str + self._bqclient, + table_ref, + all_columns, + time_travel_timestamp, + table.table_type, + filter_str, ) # ---------------------------- diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 6ee9fb8247..1fb12d3f82 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -1524,6 +1524,10 @@ def test_shape(scalars_dfs): @pytest.mark.parametrize( "reference_table, test_table", [ + ( + "bigframes-dev.bigframes_tests_sys.base_table", + "bigframes-dev.bigframes_tests_sys.base_table_mat_view", + ), ( "bigframes-dev.bigframes_tests_sys.base_table", "bigframes-dev.bigframes_tests_sys.base_table_view", From 38f7836d56502f3cb813c2440b7814f629f3904f Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 9 Oct 2024 21:15:19 +0000 Subject: [PATCH 2/2] update test --- tests/system/small/test_session.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 17e8b99704..4b48915d2d 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -390,9 +390,16 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): assert df3 is not None -def test_read_gbq_on_linked_dataset_warns(session): +@pytest.mark.parametrize( + "source_table", + [ + "bigframes-dev.thelook_ecommerce.orders", + "bigframes-dev.bigframes_tests_sys.base_table_mat_view", + ], +) +def test_read_gbq_on_linked_dataset_warns(session, source_table): with warnings.catch_warnings(record=True) as warned: - session.read_gbq("bigframes-dev.thelook_ecommerce.orders") + session.read_gbq(source_table) assert len(warned) == 1 assert warned[0].category == bigframes.exceptions.TimeTravelDisabledWarning