Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit e8c4603

Browse filesBrowse files
authored
fix: support melting empty DataFrames without crashing (#2509)
Allows alignment melts over zero-row offset layouts Fixes #<452681068> 🦕
1 parent b5a7652 commit e8c4603
Copy full SHA for e8c4603

3 files changed

+93-20Lines changed: 93 additions & 20 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/blocks.py‎

Copy file name to clipboardExpand all lines: bigframes/core/blocks.py
+49-20Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,9 +1822,9 @@ def melt(
18221822
Arguments correspond to pandas.melt arguments.
18231823
"""
18241824
# TODO: Implement col_level and ignore_index
1825-
value_labels: pd.Index = pd.Index(
1826-
[self.col_id_to_label[col_id] for col_id in value_vars]
1827-
)
1825+
value_labels: pd.Index = self.column_labels[
1826+
[self.value_columns.index(col_id) for col_id in value_vars]
1827+
]
18281828
id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]
18291829

18301830
unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
@@ -3417,6 +3417,7 @@ def unpivot(
34173417
joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
34183418
array_value, type="cross"
34193419
)
3420+
34203421
new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
34213422
# Last column is offsets
34223423
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
@@ -3426,20 +3427,24 @@ def unpivot(
34263427
unpivot_exprs: List[ex.Expression] = []
34273428
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
34283429
for input_ids in unpivot_columns:
3429-
# row explode offset used to choose the input column
3430-
# we use offset instead of label as labels are not necessarily unique
3431-
cases = itertools.chain(
3432-
*(
3433-
(
3434-
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3435-
ex.deref(column_mapping[id_or_null])
3436-
if (id_or_null is not None)
3437-
else ex.const(None),
3430+
col_expr: ex.Expression
3431+
if not input_ids:
3432+
col_expr = ex.const(None, dtype=bigframes.dtypes.INT_DTYPE)
3433+
else:
3434+
# row explode offset used to choose the input column
3435+
# we use offset instead of label as labels are not necessarily unique
3436+
cases = itertools.chain(
3437+
*(
3438+
(
3439+
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
3440+
ex.deref(column_mapping[id_or_null])
3441+
if (id_or_null is not None)
3442+
else ex.const(None),
3443+
)
3444+
for i, id_or_null in enumerate(input_ids)
34383445
)
3439-
for i, id_or_null in enumerate(input_ids)
34403446
)
3441-
)
3442-
col_expr = ops.case_when_op.as_expr(*cases)
3447+
col_expr = ops.case_when_op.as_expr(*cases)
34433448
unpivot_exprs.append(col_expr)
34443449

34453450
joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
@@ -3457,19 +3462,43 @@ def _pd_index_to_array_value(
34573462
Create an ArrayValue from a list of label tuples.
34583463
The last column will be row offsets.
34593464
"""
3465+
id_gen = bigframes.core.identifiers.standard_id_strings()
3466+
col_ids = [next(id_gen) for _ in range(index.nlevels)]
3467+
offset_id = next(id_gen)
3468+
34603469
rows = []
34613470
labels_as_tuples = utils.index_as_tuples(index)
34623471
for row_offset in range(len(index)):
3463-
id_gen = bigframes.core.identifiers.standard_id_strings()
34643472
row_label = labels_as_tuples[row_offset]
34653473
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
34663474
row = {}
3467-
for label_part, id in zip(row_label, id_gen):
3468-
row[id] = label_part if pd.notnull(label_part) else None
3469-
row[next(id_gen)] = row_offset
3475+
for label_part, col_id in zip(row_label, col_ids):
3476+
row[col_id] = label_part if pd.notnull(label_part) else None
3477+
row[offset_id] = row_offset
34703478
rows.append(row)
34713479

3472-
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
3480+
if not rows:
3481+
dtypes_list = getattr(index, "dtypes", None)
3482+
if dtypes_list is None:
3483+
dtypes_list = (
3484+
[index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
3485+
)
3486+
3487+
fields = []
3488+
for col_id, dtype in zip(col_ids, dtypes_list):
3489+
try:
3490+
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
3491+
except Exception:
3492+
pa_type = pa.string()
3493+
fields.append(pa.field(col_id, pa_type))
3494+
fields.append(pa.field(offset_id, pa.int64()))
3495+
schema = pa.schema(fields)
3496+
pt = pa.Table.from_pylist([], schema=schema)
3497+
else:
3498+
pt = pa.Table.from_pylist(rows)
3499+
pt = pt.rename_columns([*col_ids, offset_id])
3500+
3501+
return core.ArrayValue.from_pyarrow(pt, session=session)
34733502

34743503

34753504
def _resolve_index_col(
Collapse file

‎tests/system/small/test_dataframe.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_dataframe.py
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5902,6 +5902,19 @@ def test_to_gbq_table_labels(scalars_df_index):
59025902
assert table.labels["test"] == "labels"
59035903

59045904

5905+
def test_to_gbq_obj_ref_persists(session):
5906+
# Test that saving and loading an Object Reference retains its dtype
5907+
bdf = session.from_glob_path(
5908+
"gs://cloud-samples-data/vision/ocr/*.jpg", name="uris"
5909+
).head(1)
5910+
5911+
destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence"
5912+
bdf.to_gbq(destination_table, if_exists="replace")
5913+
5914+
loaded_df = session.read_gbq(destination_table)
5915+
assert loaded_df["uris"].dtype == dtypes.OBJ_REF_DTYPE
5916+
5917+
59055918
@pytest.mark.parametrize(
59065919
("col_names", "ignore_index"),
59075920
[
Collapse file

‎tests/system/small/test_multiindex.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_multiindex.py
+31Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,3 +1490,34 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
14901490
bigframes.testing.utils.assert_index_equal(
14911491
pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
14921492
)
1493+
1494+
1495+
def test_count_empty_multiindex_columns(session):
1496+
df = pandas.DataFrame(
1497+
[], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
1498+
)
1499+
bdf = session.read_pandas(df)
1500+
1501+
# count() operation unpivots columns, triggering the empty MultiIndex bug internally
1502+
count_df = bdf.count()
1503+
1504+
# The local fix ensures that empty unpivoted columns generate properly typed NULLs
1505+
# rather than failing syntax validation downstream in BigQuery.
1506+
# We compile to `.sql` to verify it succeeds locally without evaluating on BigQuery natively.
1507+
_ = count_df.to_frame().sql
1508+
1509+
# Assert structural layout is correct
1510+
assert count_df.index.nlevels == 2
1511+
assert list(count_df.index.names) == ["a", "b"]
1512+
1513+
1514+
def test_dataframe_melt_multiindex(session):
1515+
# Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
1516+
df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
1517+
df.columns = pandas.MultiIndex.from_tuples(
1518+
[("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
1519+
)
1520+
bdf = session.read_pandas(df)
1521+
1522+
count_df = bdf.count().to_pandas()
1523+
assert count_df.shape[0] == 3

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.