From 68d67559d87c77fb77baac6c374dcaf73e7e5687 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 5 Sep 2024 18:12:00 +0000 Subject: [PATCH 1/2] fix: Fix read_gbq with ORDER BY query and index_col set --- bigframes/core/blocks.py | 2 +- bigframes/session/loader.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index a309671842..1694c944a0 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -2540,7 +2540,7 @@ def _get_rows_as_json_values(self) -> Block: # The only ways this code is used is through df.apply(axis=1) cope path # TODO: Stop using internal API destination, query_job = self.session._loader._query_to_destination( - json_sql, index_cols=[ordering_column_name], api_name="apply" + json_sql, cluster_candidates=[ordering_column_name], api_name="apply" ) if not destination: raise ValueError(f"Query job {query_job} did not produce result table") diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index edfd57b965..2cdd8f977f 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -508,9 +508,10 @@ def read_gbq_query( time_travel_timestamp=None, ) + # No cluster candidates as user query might not be clusterable (eg because of ORDER BY clause) destination, query_job = self._query_to_destination( query, - index_cols, + cluster_candidates=[], api_name=api_name, configuration=configuration, ) @@ -544,7 +545,7 @@ def read_gbq_query( def _query_to_destination( self, query: str, - index_cols: List[str], + cluster_candidates: List[str], api_name: str, configuration: dict = {"query": {"useQueryCache": True}}, do_clustering=True, @@ -567,7 +568,7 @@ def _query_to_destination( assert schema is not None if do_clustering: cluster_cols = bf_io_bigquery.select_cluster_cols( - schema, cluster_candidates=index_cols + schema, cluster_candidates=cluster_candidates ) else: cluster_cols = [] From ae8f54456ea23db18738619f2d35eec26f5a477c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 5 Sep 2024 20:19:04 +0000 Subject: [PATCH 2/2] add integration test --- tests/system/small/test_session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 5b5db74ea6..b96befd255 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -130,9 +130,10 @@ def test_read_gbq_w_unknown_index_col( CONCAT(t.string_col, "_2") AS my_strings, t.int64_col > 0 AS my_bools, FROM `{scalars_table_id}` AS t + ORDER BY my_strings """, ["my_strings"], - id="string_index", + id="string_index_w_order_by", ), pytest.param( "SELECT GENERATE_UUID() AS uuid, 0 AS my_value FROM UNNEST(GENERATE_ARRAY(1, 20))",