Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit cc2dbae

Browse filesBrowse files
authored
fix: do not warn with DefaultIndexWarning in partial ordering mode (#2230)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue b/356872356 🦕
1 parent c62e553 commit cc2dbae
Copy full SHA for cc2dbae

3 files changed

+50-1Lines changed: 50 additions & 1 deletion

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/session/_io/bigquery/read_gbq_table.py‎

Copy file name to clipboardExpand all lines: bigframes/session/_io/bigquery/read_gbq_table.py
+6-1Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ def get_index_cols(
402402
| bigframes.enums.DefaultIndexKind,
403403
*,
404404
rename_to_schema: Optional[Dict[str, str]] = None,
405+
default_index_type: bigframes.enums.DefaultIndexKind = bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
405406
) -> List[str]:
406407
"""
407408
If we can get a total ordering from the table, such as via primary key
@@ -471,7 +472,11 @@ def get_index_cols(
471472
# find index_cols to use. This is to avoid unexpected performance and
472473
# resource utilization because of the default sequential index. See
473474
# internal issue 335727141.
474-
if _is_table_clustered_or_partitioned(table) and not primary_keys:
475+
if (
476+
_is_table_clustered_or_partitioned(table)
477+
and not primary_keys
478+
and default_index_type == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
479+
):
475480
msg = bfe.format_message(
476481
f"Table '{str(table.reference)}' is clustered and/or "
477482
"partitioned, but BigQuery DataFrames was not able to find a "
Collapse file

‎bigframes/session/loader.py‎

Copy file name to clipboardExpand all lines: bigframes/session/loader.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ def read_gbq_table(
696696
table=table,
697697
index_col=index_col,
698698
rename_to_schema=rename_to_schema,
699+
default_index_type=self._default_index_type,
699700
)
700701
_check_index_col_param(
701702
index_cols,
Collapse file

‎tests/unit/session/test_read_gbq_table.py‎

Copy file name to clipboardExpand all lines: tests/unit/session/test_read_gbq_table.py
+43Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
"""Unit tests for read_gbq_table helper functions."""
1616

1717
import unittest.mock as mock
18+
import warnings
1819

1920
import google.cloud.bigquery
2021
import pytest
2122

23+
import bigframes.enums
24+
import bigframes.exceptions
2225
import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
2326
from bigframes.testing import mocks
2427

@@ -143,3 +146,43 @@ def test_check_if_index_columns_are_unique(index_cols, values_distinct, expected
143146
)
144147

145148
assert result == expected
149+
150+
151+
def test_get_index_cols_warns_if_clustered_but_sequential_index():
152+
table = google.cloud.bigquery.Table.from_api_repr(
153+
{
154+
"tableReference": {
155+
"projectId": "my-project",
156+
"datasetId": "my_dataset",
157+
"tableId": "my_table",
158+
},
159+
"clustering": {
160+
"fields": ["col1", "col2"],
161+
},
162+
},
163+
)
164+
table.schema = (
165+
google.cloud.bigquery.SchemaField("col1", "INT64"),
166+
google.cloud.bigquery.SchemaField("col2", "INT64"),
167+
google.cloud.bigquery.SchemaField("col3", "INT64"),
168+
google.cloud.bigquery.SchemaField("col4", "INT64"),
169+
)
170+
171+
with pytest.warns(bigframes.exceptions.DefaultIndexWarning, match="is clustered"):
172+
bf_read_gbq_table.get_index_cols(
173+
table,
174+
index_col=(),
175+
default_index_type=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
176+
)
177+
178+
# Ensure that we don't raise if using a NULL index by default, such as in
179+
# partial ordering mode. See: internal issue b/356872356.
180+
with warnings.catch_warnings():
181+
warnings.simplefilter(
182+
"error", category=bigframes.exceptions.DefaultIndexWarning
183+
)
184+
bf_read_gbq_table.get_index_cols(
185+
table,
186+
index_col=(),
187+
default_index_type=bigframes.enums.DefaultIndexKind.NULL,
188+
)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.