diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py index db3a426635..9d52eae77d 100644 --- a/bigframes/core/rewrite/timedeltas.py +++ b/bigframes/core/rewrite/timedeltas.py @@ -103,6 +103,17 @@ def _rewrite_op_expr( if isinstance(expr.op, ops.AddOp): return _rewrite_add_op(inputs[0], inputs[1]) + if isinstance(expr.op, ops.MulOp): + return _rewrite_mul_op(inputs[0], inputs[1]) + + if isinstance(expr.op, ops.DivOp): + return _rewrite_div_op(inputs[0], inputs[1]) + + if isinstance(expr.op, ops.FloorDivOp): + # We need to re-write floor div because for numerics: int // float => float + # but for timedeltas: int(timedelta) // float => int(timedelta) + return _rewrite_floordiv_op(inputs[0], inputs[1]) + return _TypedExpr.create_op_expr(expr.op, *inputs) @@ -126,3 +137,32 @@ def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: return _TypedExpr.create_op_expr(ops.timestamp_add_op, right, left) return _TypedExpr.create_op_expr(ops.add_op, left, right) + + +def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: + result = _TypedExpr.create_op_expr(ops.mul_op, left, right) + + if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): + return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result) + if dtypes.is_numeric(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE: + return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result) + + return result + + +def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: + result = _TypedExpr.create_op_expr(ops.div_op, left, right) + + if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): + return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result) + + return result + + +def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr: + result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right) + + if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype): + return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result) + + return result diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 21a1171ddc..f2bc1ecf85 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -115,15 +115,18 @@ cos_op, cosh_op, div_op, + DivOp, exp_op, expm1_op, floor_op, floordiv_op, + FloorDivOp, ln_op, log1p_op, log10_op, mod_op, mul_op, + MulOp, neg_op, pos_op, pow_op, @@ -282,15 +285,18 @@ "cos_op", "cosh_op", "div_op", + "DivOp", "exp_op", "expm1_op", "floor_op", "floordiv_op", + "FloorDivOp", "ln_op", "log1p_op", "log10_op", "mod_op", "mul_op", + "MulOp", "neg_op", "pos_op", "pow_op", diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py index 61544984fb..f5a290bde5 100644 --- a/bigframes/operations/numeric_ops.py +++ b/bigframes/operations/numeric_ops.py @@ -75,11 +75,17 @@ name="ceil", type_signature=op_typing.UNARY_REAL_NUMERIC ) -abs_op = base_ops.create_unary_op(name="abs", type_signature=op_typing.UNARY_NUMERIC) +abs_op = base_ops.create_unary_op( + name="abs", type_signature=op_typing.UNARY_NUMERIC_AND_TIMEDELTA +) -pos_op = base_ops.create_unary_op(name="pos", type_signature=op_typing.UNARY_NUMERIC) +pos_op = base_ops.create_unary_op( + name="pos", type_signature=op_typing.UNARY_NUMERIC_AND_TIMEDELTA +) -neg_op = base_ops.create_unary_op(name="neg", type_signature=op_typing.UNARY_NUMERIC) +neg_op = base_ops.create_unary_op( + name="neg", type_signature=op_typing.UNARY_NUMERIC_AND_TIMEDELTA +) exp_op = base_ops.create_unary_op( name="exp", type_signature=op_typing.UNARY_REAL_NUMERIC @@ -123,6 +129,9 @@ def output_type(self, *input_types): if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right_type): return right_type + if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE: + return dtypes.TIMEDELTA_DTYPE + if (left_type is None or dtypes.is_numeric(left_type)) and ( right_type is None or dtypes.is_numeric(right_type) ): @@ -142,11 +151,6 @@ class SubOp(base_ops.BinaryOp): def output_type(self, *input_types): left_type = input_types[0] right_type = input_types[1] - if (left_type is None or dtypes.is_numeric(left_type)) and ( - right_type is None or dtypes.is_numeric(right_type) - ): - # Numeric subtraction - return dtypes.coerce_to_common(left_type, right_type) if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type): return dtypes.TIMEDELTA_DTYPE @@ -154,20 +158,95 @@ def output_type(self, *input_types): if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE: return left_type + if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE: + return dtypes.TIMEDELTA_DTYPE + + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + # Numeric subtraction + return dtypes.coerce_to_common(left_type, right_type) + raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}") sub_op = SubOp() -mul_op = base_ops.create_binary_op(name="mul", type_signature=op_typing.BINARY_NUMERIC) -div_op = base_ops.create_binary_op( - name="div", type_signature=op_typing.BINARY_REAL_NUMERIC -) +@dataclasses.dataclass(frozen=True) +class MulOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "mul" -floordiv_op = base_ops.create_binary_op( - name="floordiv", type_signature=op_typing.BINARY_NUMERIC -) + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + left_type = input_types[0] + right_type = input_types[1] + + if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type): + return dtypes.TIMEDELTA_DTYPE + if dtypes.is_numeric(left_type) and right_type is dtypes.TIMEDELTA_DTYPE: + return dtypes.TIMEDELTA_DTYPE + + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + return dtypes.coerce_to_common(left_type, right_type) + + raise TypeError(f"Cannot multiply dtypes {left_type} and {right_type}") + + +mul_op = MulOp() + + +@dataclasses.dataclass(frozen=True) +class DivOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "div" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + left_type = input_types[0] + right_type = input_types[1] + + if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type): + return dtypes.TIMEDELTA_DTYPE + + if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE: + return dtypes.FLOAT_DTYPE + + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + lcd_type = dtypes.coerce_to_common(left_type, right_type) + # Real numeric ops produce floats on int input + return dtypes.FLOAT_DTYPE if lcd_type == dtypes.INT_DTYPE else lcd_type + + raise TypeError(f"Cannot divide dtypes {left_type} and {right_type}") + + +div_op = DivOp() + + +@dataclasses.dataclass(frozen=True) +class FloorDivOp(base_ops.BinaryOp): + name: typing.ClassVar[str] = "floordiv" + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + left_type = input_types[0] + right_type = input_types[1] + + if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type): + return dtypes.TIMEDELTA_DTYPE + + if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE: + return dtypes.INT_DTYPE + + if (left_type is None or dtypes.is_numeric(left_type)) and ( + right_type is None or dtypes.is_numeric(right_type) + ): + return dtypes.coerce_to_common(left_type, right_type) + + raise TypeError(f"Cannot floor divide dtypes {left_type} and {right_type}") + + +floordiv_op = FloorDivOp() pow_op = base_ops.create_binary_op(name="pow", type_signature=op_typing.BINARY_NUMERIC) diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py index 3d3c3bfeeb..689966e21b 100644 --- a/bigframes/operations/timedelta_ops.py +++ b/bigframes/operations/timedelta_ops.py @@ -26,7 +26,11 @@ class ToTimedeltaOp(base_ops.UnaryOp): unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"] def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - if input_types[0] in (dtypes.INT_DTYPE, dtypes.FLOAT_DTYPE): + if input_types[0] in ( + dtypes.INT_DTYPE, + dtypes.FLOAT_DTYPE, + dtypes.TIMEDELTA_DTYPE, + ): return dtypes.TIMEDELTA_DTYPE raise TypeError("expected integer or float input") @@ -56,7 +60,6 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT timestamp_add_op = TimestampAdd() -@dataclasses.dataclass(frozen=True) class TimestampSub(base_ops.BinaryOp): name: typing.ClassVar[str] = "timestamp_sub" diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index 86bb56fc39..0a47cd91f0 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -224,6 +224,10 @@ def output_type( # Common type signatures UNARY_NUMERIC = TypePreserving(bigframes.dtypes.is_numeric, description="numeric") +UNARY_NUMERIC_AND_TIMEDELTA = TypePreserving( + lambda x: bigframes.dtypes.is_numeric(x) or x is bigframes.dtypes.TIMEDELTA_DTYPE, + description="numeric_and_timedelta", +) UNARY_REAL_NUMERIC = UnaryRealNumeric() BINARY_NUMERIC = BinaryNumeric() BINARY_REAL_NUMERIC = BinaryRealNumeric() diff --git a/bigframes/series.py b/bigframes/series.py index 54dd6e7f21..fe2d1aae0e 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -964,6 +964,9 @@ def update(self, other: Union[Series, Sequence, Mapping]) -> None: ) self._set_block(result._get_block()) + def __abs__(self) -> Series: + return self.abs() + def abs(self) -> Series: return self._apply_unary_op(ops.abs_op) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 9dc889f619..356000b3f6 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -21,6 +21,8 @@ import pandas.testing import pytest +from bigframes import dtypes + @pytest.fixture(scope="module") def temporal_dfs(session): @@ -37,15 +39,16 @@ def temporal_dfs(session): pd.Timestamp("2005-03-05 02:00:00", tz="UTC"), ], "timedelta_col_1": [ - pd.Timedelta(3, "s"), + pd.Timedelta(5, "s"), pd.Timedelta(-4, "d"), pd.Timedelta(5, "h"), ], "timedelta_col_2": [ - pd.Timedelta(2, "s"), + pd.Timedelta(3, "s"), pd.Timedelta(-4, "d"), pd.Timedelta(6, "h"), ], + "numeric_col": [1.5, 2, -3], } ) @@ -54,6 +57,100 @@ def temporal_dfs(session): return bigframes_df, pandas_df +def _assert_series_equal(actual: pd.Series, expected: pd.Series): + """Helper function specifically for timedelta testsing. Don't use it outside of this module.""" + if actual.dtype == dtypes.FLOAT_DTYPE: + pandas.testing.assert_series_equal( + actual, expected.astype("Float64"), check_index_type=False + ) + elif actual.dtype == dtypes.INT_DTYPE: + pandas.testing.assert_series_equal( + actual, expected.astype("Int64"), check_index_type=False + ) + else: + pandas.testing.assert_series_equal( + actual.astype("timedelta64[ns]"), + expected.dt.floor("us"), # in BF the precision is microsecond + check_index_type=False, + ) + + +@pytest.mark.parametrize( + ("op", "col_1", "col_2"), + [ + (operator.add, "timedelta_col_1", "timedelta_col_2"), + (operator.sub, "timedelta_col_1", "timedelta_col_2"), + (operator.truediv, "timedelta_col_1", "timedelta_col_2"), + (operator.floordiv, "timedelta_col_1", "timedelta_col_2"), + (operator.truediv, "timedelta_col_1", "numeric_col"), + (operator.floordiv, "timedelta_col_1", "numeric_col"), + (operator.mul, "timedelta_col_1", "numeric_col"), + (operator.mul, "numeric_col", "timedelta_col_1"), + ], +) +def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2): + bf_df, pd_df = temporal_dfs + + actual_result = op(bf_df[col_1], bf_df[col_2]).to_pandas() + + expected_result = op(pd_df[col_1], pd_df[col_2]) + _assert_series_equal(actual_result, expected_result) + + +@pytest.mark.parametrize( + ("op", "col", "literal"), + [ + (operator.add, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.sub, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.truediv, "timedelta_col_1", 3), + (operator.floordiv, "timedelta_col_1", 3), + (operator.mul, "timedelta_col_1", 3), + (operator.mul, "numeric_col", pd.Timedelta(1, "s")), + ], +) +def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal): + bf_df, pd_df = temporal_dfs + + actual_result = op(bf_df[col], literal).to_pandas() + + expected_result = op(pd_df[col], literal) + _assert_series_equal(actual_result, expected_result) + + +@pytest.mark.parametrize( + ("op", "col", "literal"), + [ + (operator.add, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.sub, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s")), + (operator.truediv, "numeric_col", pd.Timedelta(2, "s")), + (operator.floordiv, "numeric_col", pd.Timedelta(2, "s")), + (operator.mul, "timedelta_col_1", 3), + (operator.mul, "numeric_col", pd.Timedelta(1, "s")), + ], +) +def test_timedelta_binary_ops_literal_and_series(temporal_dfs, op, col, literal): + bf_df, pd_df = temporal_dfs + + actual_result = op(literal, bf_df[col]).to_pandas() + + expected_result = op(literal, pd_df[col]) + _assert_series_equal(actual_result, expected_result) + + +@pytest.mark.parametrize("op", [operator.pos, operator.neg, operator.abs]) +def test_timedelta_unary_ops(temporal_dfs, op): + bf_df, pd_df = temporal_dfs + + actual_result = op(bf_df["timedelta_col_1"]).to_pandas() + + expected_result = op(pd_df["timedelta_col_1"]) + _assert_series_equal(actual_result, expected_result) + + @pytest.mark.parametrize( ("column", "pd_dtype"), [