diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 43f605dc03..a180b99719 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -2573,7 +2573,7 @@ def _get_rows_as_json_values(self) -> Block: # The only ways this code is used is through df.apply(axis=1) cope path # TODO: Stop using internal API destination, query_job = self.session._loader._query_to_destination( - json_sql, index_cols=[ordering_column_name], api_name="apply" + json_sql, cluster_candidates=[ordering_column_name], api_name="apply" ) if not destination: raise ValueError(f"Query job {query_job} did not produce result table") diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index 43faae37c3..a7af7d0192 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -603,9 +603,10 @@ def read_gbq_query( time_travel_timestamp=None, ) + # No cluster candidates as user query might not be clusterable (eg because of ORDER BY clause) destination, query_job = self._query_to_destination( query, - index_cols, + cluster_candidates=[], api_name=api_name, configuration=configuration, ) @@ -642,7 +643,7 @@ def read_gbq_query( def _query_to_destination( self, query: str, - index_cols: List[str], + cluster_candidates: List[str], api_name: str, configuration: dict = {"query": {"useQueryCache": True}}, do_clustering=True, @@ -665,7 +666,7 @@ def _query_to_destination( assert schema is not None if do_clustering: cluster_cols = bf_io_bigquery.select_cluster_cols( - schema, cluster_candidates=index_cols + schema, cluster_candidates=cluster_candidates ) else: cluster_cols = [] diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index e95509e033..a1b08671ba 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -129,9 +129,10 @@ def test_read_gbq_w_unknown_index_col( CONCAT(t.string_col, "_2") AS my_strings, t.int64_col > 0 AS my_bools, FROM `{scalars_table_id}` AS t + ORDER BY my_strings """, ["my_strings"], - id="string_index", + id="string_index_w_order_by", ), pytest.param( "SELECT GENERATE_UUID() AS uuid, 0 AS my_value FROM UNNEST(GENERATE_ARRAY(1, 20))",