Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.
69 changes: 49 additions & 20 deletions 69 bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1822,9 +1822,9 @@ def melt(
Arguments correspond to pandas.melt arguments.
"""
# TODO: Implement col_level and ignore_index
value_labels: pd.Index = pd.Index(
[self.col_id_to_label[col_id] for col_id in value_vars]
)
value_labels: pd.Index = self.column_labels[
[self.value_columns.index(col_id) for col_id in value_vars]
]
id_labels = [self.col_id_to_label[col_id] for col_id in id_vars]

unpivot_expr, (var_col_ids, unpivot_out, passthrough_cols) = unpivot(
Expand Down Expand Up @@ -3417,6 +3417,7 @@ def unpivot(
joined_array, (labels_mapping, column_mapping) = labels_array.relational_join(
array_value, type="cross"
)

new_passthrough_cols = [column_mapping[col] for col in passthrough_columns]
# Last column is offsets
index_col_ids = [labels_mapping[col] for col in labels_array.column_ids[:-1]]
Expand All @@ -3426,20 +3427,24 @@ def unpivot(
unpivot_exprs: List[ex.Expression] = []
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
for input_ids in unpivot_columns:
# row explode offset used to choose the input column
# we use offset instead of label as labels are not necessarily unique
cases = itertools.chain(
*(
(
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
ex.deref(column_mapping[id_or_null])
if (id_or_null is not None)
else ex.const(None),
col_expr: ex.Expression
if not input_ids:
col_expr = ex.const(None, dtype=bigframes.dtypes.INT_DTYPE)
else:
# row explode offset used to choose the input column
# we use offset instead of label as labels are not necessarily unique
cases = itertools.chain(
*(
(
ops.eq_op.as_expr(explode_offsets_id, ex.const(i)),
ex.deref(column_mapping[id_or_null])
if (id_or_null is not None)
else ex.const(None),
)
for i, id_or_null in enumerate(input_ids)
)
for i, id_or_null in enumerate(input_ids)
)
)
col_expr = ops.case_when_op.as_expr(*cases)
col_expr = ops.case_when_op.as_expr(*cases)
unpivot_exprs.append(col_expr)

joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
Expand All @@ -3457,19 +3462,43 @@ def _pd_index_to_array_value(
Create an ArrayValue from a list of label tuples.
The last column will be row offsets.
"""
id_gen = bigframes.core.identifiers.standard_id_strings()
col_ids = [next(id_gen) for _ in range(index.nlevels)]
offset_id = next(id_gen)

rows = []
labels_as_tuples = utils.index_as_tuples(index)
for row_offset in range(len(index)):
id_gen = bigframes.core.identifiers.standard_id_strings()
row_label = labels_as_tuples[row_offset]
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
row = {}
for label_part, id in zip(row_label, id_gen):
row[id] = label_part if pd.notnull(label_part) else None
row[next(id_gen)] = row_offset
for label_part, col_id in zip(row_label, col_ids):
row[col_id] = label_part if pd.notnull(label_part) else None
row[offset_id] = row_offset
rows.append(row)

return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
if not rows:
dtypes_list = getattr(index, "dtypes", None)
if dtypes_list is None:
dtypes_list = (
[index.dtype] if hasattr(index, "dtype") else [pd.Float64Dtype()]
)

fields = []
for col_id, dtype in zip(col_ids, dtypes_list):
try:
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
except Exception:
pa_type = pa.string()
fields.append(pa.field(col_id, pa_type))
fields.append(pa.field(offset_id, pa.int64()))
schema = pa.schema(fields)
pt = pa.Table.from_pylist([], schema=schema)
else:
pt = pa.Table.from_pylist(rows)
pt = pt.rename_columns([*col_ids, offset_id])

return core.ArrayValue.from_pyarrow(pt, session=session)


def _resolve_index_col(
Expand Down
13 changes: 13 additions & 0 deletions 13 tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5902,6 +5902,19 @@ def test_to_gbq_table_labels(scalars_df_index):
assert table.labels["test"] == "labels"


def test_to_gbq_obj_ref_persists(session):
# Test that saving and loading an Object Reference retains its dtype
bdf = session.from_glob_path(
"gs://cloud-samples-data/vision/ocr/*.jpg", name="uris"
).head(1)

destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence"
bdf.to_gbq(destination_table, if_exists="replace")

loaded_df = session.read_gbq(destination_table)
assert loaded_df["uris"].dtype == dtypes.OBJ_REF_DTYPE


@pytest.mark.parametrize(
("col_names", "ignore_index"),
[
Expand Down
31 changes: 31 additions & 0 deletions 31 tests/system/small/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1490,3 +1490,34 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
bigframes.testing.utils.assert_index_equal(
pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
)


def test_count_empty_multiindex_columns(session):
df = pandas.DataFrame(
[], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
)
bdf = session.read_pandas(df)

# count() operation unpivots columns, triggering the empty MultiIndex bug internally
count_df = bdf.count()

# The local fix ensures that empty unpivoted columns generate properly typed NULLs
# rather than failing syntax validation downstream in BigQuery.
# We compile to `.sql` to verify it succeeds locally without evaluating on BigQuery natively.
_ = count_df.to_frame().sql

# Assert structural layout is correct
assert count_df.index.nlevels == 2
assert list(count_df.index.names) == ["a", "b"]


def test_dataframe_melt_multiindex(session):
# Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
df.columns = pandas.MultiIndex.from_tuples(
[("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
)
bdf = session.read_pandas(df)

count_df = bdf.count().to_pandas()
assert count_df.shape[0] == 3
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.