Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 1274167

Browse filesBrowse files
feat: Add bigframes.pandas.col with basic operators (#2405)
1 parent 4e2689a commit 1274167
Copy full SHA for 1274167

8 files changed

+385-17Lines changed: 385 additions & 17 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/agg_expressions.py‎

Copy file name to clipboardExpand all lines: bigframes/core/agg_expressions.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import functools
2020
import itertools
2121
import typing
22-
from typing import Callable, Mapping, Tuple, TypeVar
22+
from typing import Callable, Hashable, Mapping, Tuple, TypeVar
2323

2424
from bigframes import dtypes
2525
from bigframes.core import expression, window_spec
@@ -68,7 +68,7 @@ def children(self) -> Tuple[expression.Expression, ...]:
6868
return self.inputs
6969

7070
@property
71-
def free_variables(self) -> typing.Tuple[str, ...]:
71+
def free_variables(self) -> typing.Tuple[Hashable, ...]:
7272
return tuple(
7373
itertools.chain.from_iterable(map(lambda x: x.free_variables, self.inputs))
7474
)
@@ -92,7 +92,7 @@ def transform_children(
9292

9393
def bind_variables(
9494
self: TExpression,
95-
bindings: Mapping[str, expression.Expression],
95+
bindings: Mapping[Hashable, expression.Expression],
9696
allow_partial_bindings: bool = False,
9797
) -> TExpression:
9898
return self.transform_children(
@@ -192,7 +192,7 @@ def children(self) -> Tuple[expression.Expression, ...]:
192192
return self.inputs
193193

194194
@property
195-
def free_variables(self) -> typing.Tuple[str, ...]:
195+
def free_variables(self) -> typing.Tuple[Hashable, ...]:
196196
return tuple(
197197
itertools.chain.from_iterable(map(lambda x: x.free_variables, self.inputs))
198198
)
@@ -216,7 +216,7 @@ def transform_children(
216216

217217
def bind_variables(
218218
self: WindowExpression,
219-
bindings: Mapping[str, expression.Expression],
219+
bindings: Mapping[Hashable, expression.Expression],
220220
allow_partial_bindings: bool = False,
221221
) -> WindowExpression:
222222
return self.transform_children(
Collapse file

‎bigframes/core/col.py‎

Copy file name to clipboard
+126Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from __future__ import annotations
15+
16+
import dataclasses
17+
from typing import Any, Hashable
18+
19+
import bigframes_vendored.pandas.core.col as pd_col
20+
21+
import bigframes.core.expression as bf_expression
22+
import bigframes.operations as bf_ops
23+
24+
25+
# Not to be confused with the Expression class in `bigframes.core.expressions`
26+
# Name collision unintended
27+
@dataclasses.dataclass(frozen=True)
28+
class Expression:
29+
__doc__ = pd_col.Expression.__doc__
30+
31+
_value: bf_expression.Expression
32+
33+
def _apply_unary(self, op: bf_ops.UnaryOp) -> Expression:
34+
return Expression(op.as_expr(self._value))
35+
36+
def _apply_binary(self, other: Any, op: bf_ops.BinaryOp, reverse: bool = False):
37+
if isinstance(other, Expression):
38+
other_value = other._value
39+
else:
40+
other_value = bf_expression.const(other)
41+
if reverse:
42+
return Expression(op.as_expr(other_value, self._value))
43+
else:
44+
return Expression(op.as_expr(self._value, other_value))
45+
46+
def __add__(self, other: Any) -> Expression:
47+
return self._apply_binary(other, bf_ops.add_op)
48+
49+
def __radd__(self, other: Any) -> Expression:
50+
return self._apply_binary(other, bf_ops.add_op, reverse=True)
51+
52+
def __sub__(self, other: Any) -> Expression:
53+
return self._apply_binary(other, bf_ops.sub_op)
54+
55+
def __rsub__(self, other: Any) -> Expression:
56+
return self._apply_binary(other, bf_ops.sub_op, reverse=True)
57+
58+
def __mul__(self, other: Any) -> Expression:
59+
return self._apply_binary(other, bf_ops.mul_op)
60+
61+
def __rmul__(self, other: Any) -> Expression:
62+
return self._apply_binary(other, bf_ops.mul_op, reverse=True)
63+
64+
def __truediv__(self, other: Any) -> Expression:
65+
return self._apply_binary(other, bf_ops.div_op)
66+
67+
def __rtruediv__(self, other: Any) -> Expression:
68+
return self._apply_binary(other, bf_ops.div_op, reverse=True)
69+
70+
def __floordiv__(self, other: Any) -> Expression:
71+
return self._apply_binary(other, bf_ops.floordiv_op)
72+
73+
def __rfloordiv__(self, other: Any) -> Expression:
74+
return self._apply_binary(other, bf_ops.floordiv_op, reverse=True)
75+
76+
def __ge__(self, other: Any) -> Expression:
77+
return self._apply_binary(other, bf_ops.ge_op)
78+
79+
def __gt__(self, other: Any) -> Expression:
80+
return self._apply_binary(other, bf_ops.gt_op)
81+
82+
def __le__(self, other: Any) -> Expression:
83+
return self._apply_binary(other, bf_ops.le_op)
84+
85+
def __lt__(self, other: Any) -> Expression:
86+
return self._apply_binary(other, bf_ops.lt_op)
87+
88+
def __eq__(self, other: object) -> Expression: # type: ignore
89+
return self._apply_binary(other, bf_ops.eq_op)
90+
91+
def __ne__(self, other: object) -> Expression: # type: ignore
92+
return self._apply_binary(other, bf_ops.ne_op)
93+
94+
def __mod__(self, other: Any) -> Expression:
95+
return self._apply_binary(other, bf_ops.mod_op)
96+
97+
def __rmod__(self, other: Any) -> Expression:
98+
return self._apply_binary(other, bf_ops.mod_op, reverse=True)
99+
100+
def __and__(self, other: Any) -> Expression:
101+
return self._apply_binary(other, bf_ops.and_op)
102+
103+
def __rand__(self, other: Any) -> Expression:
104+
return self._apply_binary(other, bf_ops.and_op, reverse=True)
105+
106+
def __or__(self, other: Any) -> Expression:
107+
return self._apply_binary(other, bf_ops.or_op)
108+
109+
def __ror__(self, other: Any) -> Expression:
110+
return self._apply_binary(other, bf_ops.or_op, reverse=True)
111+
112+
def __xor__(self, other: Any) -> Expression:
113+
return self._apply_binary(other, bf_ops.xor_op)
114+
115+
def __rxor__(self, other: Any) -> Expression:
116+
return self._apply_binary(other, bf_ops.xor_op, reverse=True)
117+
118+
def __invert__(self) -> Expression:
119+
return self._apply_unary(bf_ops.invert_op)
120+
121+
122+
def col(col_name: Hashable) -> Expression:
123+
return Expression(bf_expression.free_var(col_name))
124+
125+
126+
col.__doc__ = pd_col.col.__doc__
Collapse file

‎bigframes/core/expression.py‎

Copy file name to clipboardExpand all lines: bigframes/core/expression.py
+21-11Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import functools
2020
import itertools
2121
import typing
22-
from typing import Callable, Generator, Mapping, TypeVar, Union
22+
from typing import Callable, Generator, Hashable, Mapping, TypeVar, Union
2323

2424
import pandas as pd
2525

@@ -39,7 +39,7 @@ def deref(name: str) -> DerefOp:
3939
return DerefOp(ids.ColumnId(name))
4040

4141

42-
def free_var(id: str) -> UnboundVariableExpression:
42+
def free_var(id: Hashable) -> UnboundVariableExpression:
4343
return UnboundVariableExpression(id)
4444

4545

@@ -52,7 +52,7 @@ class Expression(abc.ABC):
5252
"""An expression represents a computation taking N scalar inputs and producing a single output scalar."""
5353

5454
@property
55-
def free_variables(self) -> typing.Tuple[str, ...]:
55+
def free_variables(self) -> typing.Tuple[Hashable, ...]:
5656
return ()
5757

5858
@property
@@ -116,7 +116,9 @@ def bind_refs(
116116

117117
@abc.abstractmethod
118118
def bind_variables(
119-
self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False
119+
self,
120+
bindings: Mapping[Hashable, Expression],
121+
allow_partial_bindings: bool = False,
120122
) -> Expression:
121123
"""Replace variables with expression given in `bindings`.
122124
@@ -191,7 +193,9 @@ def output_type(self) -> dtypes.ExpressionType:
191193
return self.dtype
192194

193195
def bind_variables(
194-
self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False
196+
self,
197+
bindings: Mapping[Hashable, Expression],
198+
allow_partial_bindings: bool = False,
195199
) -> Expression:
196200
return self
197201

@@ -226,10 +230,10 @@ def transform_children(self, t: Callable[[Expression], Expression]) -> Expressio
226230
class UnboundVariableExpression(Expression):
227231
"""A variable expression representing an unbound variable."""
228232

229-
id: str
233+
id: Hashable
230234

231235
@property
232-
def free_variables(self) -> typing.Tuple[str, ...]:
236+
def free_variables(self) -> typing.Tuple[Hashable, ...]:
233237
return (self.id,)
234238

235239
@property
@@ -256,7 +260,9 @@ def bind_refs(
256260
return self
257261

258262
def bind_variables(
259-
self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False
263+
self,
264+
bindings: Mapping[Hashable, Expression],
265+
allow_partial_bindings: bool = False,
260266
) -> Expression:
261267
if self.id in bindings.keys():
262268
return bindings[self.id]
@@ -304,7 +310,9 @@ def output_type(self) -> dtypes.ExpressionType:
304310
raise ValueError(f"Type of variable {self.id} has not been fixed.")
305311

306312
def bind_variables(
307-
self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False
313+
self,
314+
bindings: Mapping[Hashable, Expression],
315+
allow_partial_bindings: bool = False,
308316
) -> Expression:
309317
return self
310318

@@ -373,7 +381,7 @@ def column_references(
373381
)
374382

375383
@property
376-
def free_variables(self) -> typing.Tuple[str, ...]:
384+
def free_variables(self) -> typing.Tuple[Hashable, ...]:
377385
return tuple(
378386
itertools.chain.from_iterable(map(lambda x: x.free_variables, self.inputs))
379387
)
@@ -408,7 +416,9 @@ def output_type(self) -> dtypes.ExpressionType:
408416
return self.op.output_type(*input_types)
409417

410418
def bind_variables(
411-
self, bindings: Mapping[str, Expression], allow_partial_bindings: bool = False
419+
self,
420+
bindings: Mapping[Hashable, Expression],
421+
allow_partial_bindings: bool = False,
412422
) -> OpExpression:
413423
return OpExpression(
414424
self.op,
Collapse file

‎bigframes/dataframe.py‎

Copy file name to clipboardExpand all lines: bigframes/dataframe.py
+15-1Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from bigframes.core import agg_expressions
5959
import bigframes.core.block_transforms as block_ops
6060
import bigframes.core.blocks as blocks
61+
import bigframes.core.col
6162
import bigframes.core.convert
6263
import bigframes.core.explode
6364
import bigframes.core.expression as ex
@@ -94,7 +95,13 @@
9495
import bigframes.session
9596

9697
SingleItemValue = Union[
97-
bigframes.series.Series, int, float, str, pandas.Timedelta, Callable
98+
bigframes.series.Series,
99+
int,
100+
float,
101+
str,
102+
pandas.Timedelta,
103+
Callable,
104+
bigframes.core.col.Expression,
98105
]
99106
MultiItemValue = Union[
100107
"DataFrame", Sequence[int | float | str | pandas.Timedelta | Callable]
@@ -2236,6 +2243,13 @@ def _assign_single_item(
22362243
) -> DataFrame:
22372244
if isinstance(v, bigframes.series.Series):
22382245
return self._assign_series_join_on_index(k, v)
2246+
elif isinstance(v, bigframes.core.col.Expression):
2247+
label_to_col_ref = {
2248+
label: ex.deref(id) for id, label in self._block.col_id_to_label.items()
2249+
}
2250+
resolved_expr = v._value.bind_variables(label_to_col_ref)
2251+
block = self._block.project_block_exprs([resolved_expr], labels=[k])
2252+
return DataFrame(block)
22392253
elif isinstance(v, bigframes.dataframe.DataFrame):
22402254
v_df_col_count = len(v._block.value_columns)
22412255
if v_df_col_count != 1:
Collapse file

‎bigframes/pandas/__init__.py‎

Copy file name to clipboardExpand all lines: bigframes/pandas/__init__.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pandas
2828

2929
import bigframes._config as config
30+
from bigframes.core.col import col
3031
import bigframes.core.global_session as global_session
3132
import bigframes.core.indexes
3233
from bigframes.core.logging import log_adapter
@@ -415,6 +416,7 @@ def reset_session():
415416
"clean_up_by_session_id",
416417
"concat",
417418
"crosstab",
419+
"col",
418420
"cut",
419421
"deploy_remote_function",
420422
"deploy_udf",

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.