From c07a2417263ae49a49573b8ccd5356e655800c19 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Wed, 19 Feb 2025 21:44:02 +0000
Subject: [PATCH 1/3] fix: fix a bug where to_timdelta() calls over timedeltas
 changes their values

---
 bigframes/core/compile/scalar_op_compiler.py |  5 ++++
 bigframes/core/rewrite/timedeltas.py         | 19 +++++++++++---
 bigframes/operations/__init__.py             |  2 ++
 bigframes/operations/timedelta_ops.py        | 27 +++++++++++++++++---
 tests/system/small/test_pandas.py            | 15 +++++++++++
 5 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index d5ce6e9e09..e0d6460092 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -1174,6 +1174,11 @@ def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):
     ).floor()
 
 
+@scalar_op_compiler.register_unary_op(ops.timedelta_floor_op)
+def timedelta_floor_op_impl(x: ibis_types.NumericValue):
+    return x.floor()
+
+
 @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
 def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
     ibis_node = getattr(op.func, "ibis_node", None)
diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py
index dad474e5a1..bde1a4431c 100644
--- a/bigframes/core/rewrite/timedeltas.py
+++ b/bigframes/core/rewrite/timedeltas.py
@@ -125,6 +125,9 @@ def _rewrite_op_expr(
         # but for timedeltas: int(timedelta) // float => int(timedelta)
         return _rewrite_floordiv_op(inputs[0], inputs[1])
 
+    if isinstance(expr.op, ops.ToTimedeltaOp):
+        return _rewrite_to_timedelta_op(expr.op, inputs[0])
+
     return _TypedExpr.create_op_expr(expr.op, *inputs)
 
 
@@ -154,9 +157,9 @@ def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.mul_op, left, right)
 
     if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
-        return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
+        return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
     if dtypes.is_numeric(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
-        return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
+        return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
 
@@ -165,7 +168,7 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.div_op, left, right)
 
     if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
-        return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
+        return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
 
@@ -174,11 +177,19 @@ def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right)
 
     if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
-        return _TypedExpr.create_op_expr(ops.ToTimedeltaOp("us"), result)
+        return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
 
 
+def _rewrite_to_timedelta_op(op: ops.ToTimedeltaOp, arg: _TypedExpr):
+    if arg.dtype is dtypes.TIMEDELTA_DTYPE:
+        # Do nothing for values that are already timedeltas
+        return arg
+
+    return _TypedExpr.create_op_expr(op, arg)
+
+
 @functools.cache
 def _rewrite_aggregation(
     aggregation: ex.Aggregation, schema: schema.ArraySchema
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index f2bc1ecf85..30e4694113 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -182,6 +182,7 @@
 from bigframes.operations.struct_ops import StructFieldOp, StructOp
 from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
 from bigframes.operations.timedelta_ops import (
+    timedelta_floor_op,
     timestamp_add_op,
     timestamp_sub_op,
     ToTimedeltaOp,
@@ -257,6 +258,7 @@
     "second_op",
     "normalize_op",
     # Timedelta ops
+    "timedelta_floor_op",
     "timestamp_add_op",
     "timestamp_sub_op",
     "ToTimedeltaOp",
diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py
index 689966e21b..364154f728 100644
--- a/bigframes/operations/timedelta_ops.py
+++ b/bigframes/operations/timedelta_ops.py
@@ -36,7 +36,26 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
 
 
 @dataclasses.dataclass(frozen=True)
-class TimestampAdd(base_ops.BinaryOp):
+class TimedeltaFloorOp(base_ops.UnaryOp):
+    """Floors the numeric value to the nearest integer and use it to represent a timedelta.
+
+    This operator is only meant to be used during expression tree rewrites. Do not use it anywhere else!
+    """
+
+    name: typing.ClassVar[str] = "timedelta_floor"
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        input_type = input_types[0]
+        if dtypes.is_numeric(input_type) or input_type is dtypes.TIMEDELTA_DTYPE:
+            return dtypes.TIMEDELTA_DTYPE
+        raise TypeError(f"unsupported type: {input_type}")
+
+
+timedelta_floor_op = TimedeltaFloorOp()
+
+
+@dataclasses.dataclass(frozen=True)
+class TimestampAddOp(base_ops.BinaryOp):
     name: typing.ClassVar[str] = "timestamp_add"
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
@@ -57,10 +76,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         )
 
 
-timestamp_add_op = TimestampAdd()
+timestamp_add_op = TimestampAddOp()
 
 
-class TimestampSub(base_ops.BinaryOp):
+class TimestampSubOp(base_ops.BinaryOp):
     name: typing.ClassVar[str] = "timestamp_sub"
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
@@ -76,4 +95,4 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         )
 
 
-timestamp_sub_op = TimestampSub()
+timestamp_sub_op = TimestampSubOp()
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index 4b4264e33c..da78432cdb 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -829,3 +829,18 @@ def test_to_timedelta_with_bf_series_invalid_unit(session, unit):
 @pytest.mark.parametrize("input", [1, 1.2, "1s"])
 def test_to_timedelta_non_bf_series(input):
     assert bpd.to_timedelta(input) == pd.to_timedelta(input)
+
+
+def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+    bf_series = bpd.to_timedelta(bf_df["int64_too"], unit="us")
+    pd_series = pd.to_timedelta(pd_df["int64_too"], unit="us")
+
+    actual_result = (
+        bpd.to_timedelta(bf_series, unit="s").to_pandas().astype("timedelta64[ns]")
+    )
+
+    expected_result = pd.to_timedelta(pd_series, unit="s")
+    pd.testing.assert_series_equal(
+        actual_result, expected_result, check_index_type=False
+    )

From 29f592d2fd8011160892d7dc8baf231eb9fb2e03 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 20 Feb 2025 17:59:17 +0000
Subject: [PATCH 2/3] add tests for floats too

---
 tests/system/small/test_pandas.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index da78432cdb..2f0175f852 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -831,10 +831,11 @@ def test_to_timedelta_non_bf_series(input):
     assert bpd.to_timedelta(input) == pd.to_timedelta(input)
 
 
-def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs):
+@pytest.mark.parametrize("column", ["int64_too", "float64_col"])
+def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs, column):
     bf_df, pd_df = scalars_dfs
-    bf_series = bpd.to_timedelta(bf_df["int64_too"], unit="us")
-    pd_series = pd.to_timedelta(pd_df["int64_too"], unit="us")
+    bf_series = bpd.to_timedelta(bf_df[column], unit="us")
+    pd_series = pd.to_timedelta(pd_df[column], unit="us").dt.floor("us")
 
     actual_result = (
         bpd.to_timedelta(bf_series, unit="s").to_pandas().astype("timedelta64[ns]")

From d36a65e2b4d4a642140c7bd283edf7ba1fe382f7 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 20 Feb 2025 19:05:21 +0000
Subject: [PATCH 3/3] remove float test case because py 3.9 env does not
 support NA very well

---
 tests/system/small/test_pandas.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index 2f0175f852..da78432cdb 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -831,11 +831,10 @@ def test_to_timedelta_non_bf_series(input):
     assert bpd.to_timedelta(input) == pd.to_timedelta(input)
 
 
-@pytest.mark.parametrize("column", ["int64_too", "float64_col"])
-def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs, column):
+def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs):
     bf_df, pd_df = scalars_dfs
-    bf_series = bpd.to_timedelta(bf_df[column], unit="us")
-    pd_series = pd.to_timedelta(pd_df[column], unit="us").dt.floor("us")
+    bf_series = bpd.to_timedelta(bf_df["int64_too"], unit="us")
+    pd_series = pd.to_timedelta(pd_df["int64_too"], unit="us")
 
     actual_result = (
         bpd.to_timedelta(bf_series, unit="s").to_pandas().astype("timedelta64[ns]")