Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 2326ad6

Browse filesBrowse files
authored
fix: handle unsupported types and empty results in describe (#2506)
The .describe() method attempts to compute metrics like mean, max, and unique on all columns. For MM complex types (structs representing images/videos), running COUNT(DISTINCT column) or mathematical aggregates is physically prohibited by BigQuery and raises syntax/type validation errors. We limit aggregations on OBJ_REF_DTYPE and JSON columns to only a basic .count(), skipping unhashable/unsupported summary metrics. Fixes #<452681068> 🦕
1 parent 91b6c24 commit 2326ad6
Copy full SHA for 2326ad6

2 files changed

+45-3Lines changed: 45 additions & 3 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/pandas/core/methods/describe.py‎

Copy file name to clipboardExpand all lines: bigframes/pandas/core/methods/describe.py
+6-3Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ def describe(
5656
"max",
5757
]
5858
).intersection(describe_block.column_labels.get_level_values(-1))
59-
describe_block = describe_block.stack(override_labels=stack_cols)
60-
61-
return dataframe.DataFrame(describe_block).droplevel(level=0)
59+
if not stack_cols.empty:
60+
describe_block = describe_block.stack(override_labels=stack_cols)
61+
return dataframe.DataFrame(describe_block).droplevel(level=0)
62+
return dataframe.DataFrame(describe_block)
6263

6364

6465
def _describe(
@@ -120,5 +121,7 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]:
120121
dtypes.TIME_DTYPE,
121122
]:
122123
return [aggregations.count_op, aggregations.nunique_op]
124+
elif dtypes.is_json_like(dtype) or dtype == dtypes.OBJ_REF_DTYPE:
125+
return [aggregations.count_op]
123126
else:
124127
return []
Collapse file

‎tests/system/small/pandas/test_describe.py‎

Copy file name to clipboardExpand all lines: tests/system/small/pandas/test_describe.py
+39Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import pandas.testing
1616
import pytest
1717

18+
import bigframes.pandas as bpd
19+
1820

1921
def test_df_describe_non_temporal(scalars_dfs):
2022
# TODO: supply a reason why this isn't compatible with pandas 1.x
@@ -352,3 +354,40 @@ def test_series_groupby_describe(scalars_dfs):
352354
check_dtype=False,
353355
check_index_type=False,
354356
)
357+
358+
359+
def test_describe_json_and_obj_ref_returns_count(session):
360+
# Test describe() works on JSON and OBJ_REF types (without nunique, which fails)
361+
sql = """
362+
SELECT
363+
PARSE_JSON('{"a": 1}') AS json_col,
364+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
365+
"""
366+
df = session.read_gbq(sql)
367+
368+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
369+
df = df.drop(columns=["uri_col"])
370+
371+
res = df.describe(include="all").to_pandas()
372+
373+
assert "count" in res.index
374+
assert res.loc["count", "json_col"] == 1.0
375+
assert res.loc["count", "obj_ref_col"] == 1.0
376+
377+
378+
def test_describe_with_unsupported_type_returns_empty_dataframe(session):
379+
df = session.read_gbq("SELECT ST_GEOGPOINT(1.0, 2.0) AS geo_col")
380+
381+
res = df.describe().to_pandas()
382+
383+
assert len(res.columns) == 0
384+
assert len(res.index) == 1
385+
386+
387+
def test_describe_empty_dataframe_returns_empty_dataframe(session):
388+
df = bpd.DataFrame()
389+
390+
res = df.describe().to_pandas()
391+
392+
assert len(res.columns) == 0
393+
assert len(res.index) == 1

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.