Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit cb00daa

Browse filesBrowse files
feat: Support pd.col simple aggregates (#2480)
1 parent a396f68 commit cb00daa
Copy full SHA for cb00daa

2 files changed

+80Lines changed: 80 additions & 0 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/col.py‎

Copy file name to clipboardExpand all lines: bigframes/core/col.py
+29Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818

1919
import bigframes_vendored.pandas.core.col as pd_col
2020

21+
from bigframes.core import agg_expressions, window_spec
2122
import bigframes.core.expression as bf_expression
2223
import bigframes.operations as bf_ops
24+
import bigframes.operations.aggregations as agg_ops
2325

2426

2527
# Not to be confused with the Expression class in `bigframes.core.expressions`
@@ -33,6 +35,15 @@ class Expression:
3335
def _apply_unary(self, op: bf_ops.UnaryOp) -> Expression:
3436
return Expression(op.as_expr(self._value))
3537

38+
def _apply_unary_agg(self, op: agg_ops.UnaryAggregateOp) -> Expression:
39+
# We probably shouldn't need to windowize here, but block apis expect pre-windowized expressions
40+
# Later on, we will probably have col expressions in windowed context, so will need to defer windowization
41+
# instead of automatically applying the default unbound window
42+
agg_expr = op.as_expr(self._value)
43+
return Expression(
44+
agg_expressions.WindowExpression(agg_expr, window_spec.unbound())
45+
)
46+
3647
def _apply_binary(self, other: Any, op: bf_ops.BinaryOp, reverse: bool = False):
3748
if isinstance(other, Expression):
3849
other_value = other._value
@@ -118,6 +129,24 @@ def __rxor__(self, other: Any) -> Expression:
118129
def __invert__(self) -> Expression:
119130
return self._apply_unary(bf_ops.invert_op)
120131

132+
def sum(self) -> Expression:
133+
return self._apply_unary_agg(agg_ops.sum_op)
134+
135+
def mean(self) -> Expression:
136+
return self._apply_unary_agg(agg_ops.mean_op)
137+
138+
def var(self) -> Expression:
139+
return self._apply_unary_agg(agg_ops.var_op)
140+
141+
def std(self) -> Expression:
142+
return self._apply_unary_agg(agg_ops.std_op)
143+
144+
def min(self) -> Expression:
145+
return self._apply_unary_agg(agg_ops.min_op)
146+
147+
def max(self) -> Expression:
148+
return self._apply_unary_agg(agg_ops.max_op)
149+
121150

122151
def col(col_name: Hashable) -> Expression:
123152
return Expression(bf_expression.free_var(col_name))
Collapse file

‎tests/unit/test_col.py‎

Copy file name to clipboardExpand all lines: tests/unit/test_col.py
+51Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,57 @@ def test_pd_col_unary_operators(scalars_dfs, op):
100100
assert_frame_equal(bf_result, pd_result)
101101

102102

103+
@pytest.mark.parametrize(
104+
("op"),
105+
[
106+
(lambda x: x.sum()),
107+
(lambda x: x.mean()),
108+
(lambda x: x.min()),
109+
(lambda x: x.max()),
110+
(lambda x: x.std()),
111+
(lambda x: x.var()),
112+
],
113+
ids=[
114+
"sum",
115+
"mean",
116+
"min",
117+
"max",
118+
"std",
119+
"var",
120+
],
121+
)
122+
def test_pd_col_aggregate_op(scalars_dfs, op):
123+
scalars_df, scalars_pandas_df = scalars_dfs
124+
bf_kwargs = {
125+
"result": op(bpd.col("float64_col")),
126+
}
127+
pd_kwargs = {
128+
"result": op(pd.col("float64_col")), # type: ignore
129+
}
130+
df = scalars_df.assign(**bf_kwargs)
131+
132+
bf_result = df.to_pandas()
133+
pd_result = scalars_pandas_df.assign(**pd_kwargs)
134+
135+
assert_frame_equal(bf_result, pd_result)
136+
137+
138+
def test_pd_col_aggregate_of_aggregate(scalars_dfs):
139+
scalars_df, scalars_pandas_df = scalars_dfs
140+
bf_kwargs = {
141+
"result": (bpd.col("int64_col") - bpd.col("int64_col").mean()).mean(),
142+
}
143+
pd_kwargs = {
144+
"result": (pd.col("int64_col") - pd.col("int64_col").mean()).mean(), # type: ignore
145+
}
146+
df = scalars_df.assign(**bf_kwargs)
147+
148+
bf_result = df.to_pandas()
149+
pd_result = scalars_pandas_df.assign(**pd_kwargs)
150+
151+
assert_frame_equal(bf_result, pd_result)
152+
153+
103154
@pytest.mark.parametrize(
104155
("op",),
105156
[

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.