googleapis · TrevorBergeron · Nov 12, 2025 · Nov 11, 2025 · Nov 12, 2025
@@ -593,6 +593,7 @@ def _agg_func(self, func) -> df.DataFrame:
    def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
        aggregations: typing.List[agg_expressions.Aggregation] = []
        column_labels = []
+        function_labels = []

        want_aggfunc_level = any(utils.is_list_like(aggs) for aggs in func.values())

@@ -602,8 +603,10 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
                funcs_for_id if utils.is_list_like(funcs_for_id) else [funcs_for_id]
            )
            for f in func_list:
-                aggregations.append(aggs.agg(col_id, agg_ops.lookup_agg_func(f)[0]))
+                f_op, f_label = agg_ops.lookup_agg_func(f)
+                aggregations.append(aggs.agg(col_id, f_op))
                column_labels.append(label)
+                function_labels.append(f_label)
        agg_block, _ = self._block.aggregate(
            by_column_ids=self._by_col_ids,
            aggregations=aggregations,
@@ -613,10 +616,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
            agg_block = agg_block.with_column_labels(
                utils.combine_indices(
                    pd.Index(column_labels),
-                    pd.Index(
-                        typing.cast(agg_ops.AggregateOp, agg.op).name
-                        for agg in aggregations
-                    ),
+                    pd.Index(function_labels),
                )
            )
        else:

@@ -717,9 +717,15 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
    np.all: all_op,
    np.any: any_op,
    np.unique: nunique_op,
-    # TODO(b/443252872): Solve
-    # list: ArrayAggOp(),
    np.size: size_op,
+    # TODO(b/443252872): Solve
+    list: ArrayAggOp(),
+    len: size_op,
+    sum: sum_op,
+    min: min_op,
+    max: max_op,
+    any: any_op,
+    all: all_op,
 }



@@ -6151,6 +6151,28 @@ def test_agg_with_dict_strs(scalars_dfs):
    )


+def test_df_agg_with_builtins(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+
+    bf_result = (
+        bf_df[["int64_col", "bool_col"]]
+        .dropna()
+        .groupby(bf_df.int64_too % 2)
+        .agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
+        .to_pandas()
+    )
+    pd_result = (
+        pd_df[["int64_col", "bool_col"]]
+        .dropna()
+        .groupby(pd_df.int64_too % 2)
+        .agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
+    )
+
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
 def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
    bf_df, _ = scalars_dfs
    agg_funcs = {

@@ -282,8 +282,6 @@ def test_dataframe_groupby_agg_dict_with_list(
    )
    bf_result_computed = bf_result.to_pandas()

-    # some inconsistency between versions, so normalize to bigframes behavior
-    pd_result = pd_result.rename({"amax": "max"}, axis="columns")
    pd.testing.assert_frame_equal(
        pd_result, bf_result_computed, check_dtype=False, check_index_type=False
    )