Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 956a5b0

Browse filesBrowse files
feat: Support builtins funcs for df.agg (#2256)
1 parent 9f497a6 commit 956a5b0
Copy full SHA for 956a5b0

4 files changed

+35-9Lines changed: 35 additions & 9 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/groupby/dataframe_group_by.py‎

Copy file name to clipboardExpand all lines: bigframes/core/groupby/dataframe_group_by.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ def _agg_func(self, func) -> df.DataFrame:
593593
def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
594594
aggregations: typing.List[agg_expressions.Aggregation] = []
595595
column_labels = []
596+
function_labels = []
596597

597598
want_aggfunc_level = any(utils.is_list_like(aggs) for aggs in func.values())
598599

@@ -602,8 +603,10 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
602603
funcs_for_id if utils.is_list_like(funcs_for_id) else [funcs_for_id]
603604
)
604605
for f in func_list:
605-
aggregations.append(aggs.agg(col_id, agg_ops.lookup_agg_func(f)[0]))
606+
f_op, f_label = agg_ops.lookup_agg_func(f)
607+
aggregations.append(aggs.agg(col_id, f_op))
606608
column_labels.append(label)
609+
function_labels.append(f_label)
607610
agg_block, _ = self._block.aggregate(
608611
by_column_ids=self._by_col_ids,
609612
aggregations=aggregations,
@@ -613,10 +616,7 @@ def _agg_dict(self, func: typing.Mapping) -> df.DataFrame:
613616
agg_block = agg_block.with_column_labels(
614617
utils.combine_indices(
615618
pd.Index(column_labels),
616-
pd.Index(
617-
typing.cast(agg_ops.AggregateOp, agg.op).name
618-
for agg in aggregations
619-
),
619+
pd.Index(function_labels),
620620
)
621621
)
622622
else:
Collapse file

‎bigframes/operations/aggregations.py‎

Copy file name to clipboardExpand all lines: bigframes/operations/aggregations.py
+8-2Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,9 +717,15 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
717717
np.all: all_op,
718718
np.any: any_op,
719719
np.unique: nunique_op,
720-
# TODO(b/443252872): Solve
721-
# list: ArrayAggOp(),
722720
np.size: size_op,
721+
# TODO(b/443252872): Solve
722+
list: ArrayAggOp(),
723+
len: size_op,
724+
sum: sum_op,
725+
min: min_op,
726+
max: max_op,
727+
any: any_op,
728+
all: all_op,
723729
}
724730

725731

Collapse file

‎tests/system/small/test_dataframe.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_dataframe.py
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,6 +6151,28 @@ def test_agg_with_dict_strs(scalars_dfs):
61516151
)
61526152

61536153

6154+
def test_df_agg_with_builtins(scalars_dfs):
6155+
bf_df, pd_df = scalars_dfs
6156+
6157+
bf_result = (
6158+
bf_df[["int64_col", "bool_col"]]
6159+
.dropna()
6160+
.groupby(bf_df.int64_too % 2)
6161+
.agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
6162+
.to_pandas()
6163+
)
6164+
pd_result = (
6165+
pd_df[["int64_col", "bool_col"]]
6166+
.dropna()
6167+
.groupby(pd_df.int64_too % 2)
6168+
.agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]})
6169+
)
6170+
6171+
pd.testing.assert_frame_equal(
6172+
bf_result, pd_result, check_dtype=False, check_index_type=False
6173+
)
6174+
6175+
61546176
def test_agg_with_dict_containing_non_existing_col_raise_key_error(scalars_dfs):
61556177
bf_df, _ = scalars_dfs
61566178
agg_funcs = {
Collapse file

‎tests/system/small/test_groupby.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_groupby.py
-2Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,8 +282,6 @@ def test_dataframe_groupby_agg_dict_with_list(
282282
)
283283
bf_result_computed = bf_result.to_pandas()
284284

285-
# some inconsistency between versions, so normalize to bigframes behavior
286-
pd_result = pd_result.rename({"amax": "max"}, axis="columns")
287285
pd.testing.assert_frame_equal(
288286
pd_result, bf_result_computed, check_dtype=False, check_index_type=False
289287
)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.