diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 18cb83fa18..a8b8afdae7 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -179,6 +179,9 @@ def any(self) -> df.DataFrame: def count(self) -> df.DataFrame: return self._aggregate_all(agg_ops.count_op) + def nunique(self) -> df.DataFrame: + return self._aggregate_all(agg_ops.nunique_op) + def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame: if not numeric_only: self._raise_on_non_numeric("cumsum") @@ -442,6 +445,9 @@ def max(self, *args) -> series.Series: def count(self) -> series.Series: return self._aggregate(agg_ops.count_op) + def nunique(self) -> series.Series: + return self._aggregate(agg_ops.nunique_op) + def sum(self, *args) -> series.Series: return self._aggregate(agg_ops.sum_op) diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index a24713c2b3..5214905186 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -69,11 +69,13 @@ def test_dataframe_groupby_median(scalars_df_index, scalars_pandas_df_index): ("operator"), [ (lambda x: x.count()), + (lambda x: x.nunique()), (lambda x: x.any()), (lambda x: x.all()), ], ids=[ "count", + "nunique", "any", "all", ], diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index b05319b4f7..8730cf0007 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -363,6 +363,15 @@ def agg(self, func): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def nunique(self): + """ + Return number of unique elements in the group. + + Returns: + Series: Number of unique values within each group. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + class DataFrameGroupBy(GroupBy): def agg(self, func, **kwargs): @@ -391,3 +400,12 @@ def agg(self, func, **kwargs): DataFrame """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def nunique(self): + """ + Return DataFrame with counts of unique elements in each position. + + Returns: + DataFrame + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)