From c8f191852294322cd5bc01741101ce2690378df6 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 13 Sep 2024 16:42:37 +0000 Subject: [PATCH 1/2] fix: DataFrameGroupby.agg no works with unnamed tuples --- bigframes/core/groupby/__init__.py | 10 ++++------ tests/system/small/test_groupby.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 2b80d0389e..a0105f4ef0 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -414,12 +414,10 @@ def _agg_named(self, **kwargs) -> df.DataFrame: raise NotImplementedError( f"Only string aggregate names supported. {constants.FEEDBACK_LINK}" ) - if not hasattr(v, "column") or not hasattr(v, "aggfunc"): - import bigframes.pandas as bpd - - raise TypeError(f"kwargs values must be {bpd.NamedAgg.__qualname__}") - col_id = self._resolve_label(v.column) - aggregations.append((col_id, agg_ops.lookup_agg_func(v.aggfunc))) + if not isinstance(v, tuple) or (len(v) != 2): + raise TypeError("kwargs values must be 2-tuples of column, aggfunc") + col_id = self._resolve_label(v[0]) + aggregations.append((col_id, agg_ops.lookup_agg_func(v[1]))) column_labels.append(k) agg_block, _ = self._block.aggregate( by_column_ids=self._by_col_ids, diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index 8e3baff4c2..a434f267c5 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -247,6 +247,26 @@ def test_dataframe_groupby_agg_named(scalars_df_index, scalars_pandas_df_index): pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) +def test_dataframe_groupby_agg_kw_tuples(scalars_df_index, scalars_pandas_df_index): + col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"] + bf_result = ( + scalars_df_index[col_names] + .groupby("string_col") + .agg( + agg1=("int64_too", "sum"), + agg2=("float64_col", "max"), + ) + ) + pd_result = ( + scalars_pandas_df_index[col_names] + .groupby("string_col") + .agg(agg1=("int64_too", "sum"), agg2=("float64_col", "max")) + ) + bf_result_computed = bf_result.to_pandas() + + pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) + + @pytest.mark.parametrize( ("as_index"), [ From bb2e7cfef2a18860d8a776c8d7217c77cd374e8c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 13 Sep 2024 19:08:52 +0000 Subject: [PATCH 2/2] add tests for invalid inputs --- tests/system/small/test_groupby.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index a434f267c5..8574860daa 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -267,6 +267,21 @@ def test_dataframe_groupby_agg_kw_tuples(scalars_df_index, scalars_pandas_df_ind pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False) +@pytest.mark.parametrize( + ("kwargs"), + [ + ({"hello": "world"}), + ({"too_many_fields": ("one", "two", "three")}), + ], +) +def test_dataframe_groupby_agg_kw_error(scalars_df_index, kwargs): + col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"] + with pytest.raises( + TypeError, match=r"kwargs values must be 2-tuples of column, aggfunc" + ): + (scalars_df_index[col_names].groupby("string_col").agg(**kwargs)) + + @pytest.mark.parametrize( ("as_index"), [