diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a52264be17..b41510d166 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -42,6 +42,8 @@ # Datetime constants UNIT_TO_US_CONVERSION_FACTORS = { + "W": 7 * 24 * 60 * 60 * 1000 * 1000, + "d": 24 * 60 * 60 * 1000 * 1000, "D": 24 * 60 * 60 * 1000 * 1000, "h": 60 * 60 * 1000 * 1000, "m": 60 * 1000 * 1000, @@ -733,12 +735,19 @@ def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp): if x.type() == ibis_dtypes.str: x = x.to_timestamp(op.format) if op.format else timestamp(x) elif x.type() == ibis_dtypes.Timestamp(timezone="UTC"): + if op.format: + raise NotImplementedError( + f"Format parameter is not supported for Timestamp input types. {constants.FEEDBACK_LINK}" + ) return x elif x.type() != ibis_dtypes.timestamp: - # The default unit is set to "ns" (nanoseconds) for consistency - # with pandas, where "ns" is the default unit for datetime operations. - unit = op.unit or "ns" - x = numeric_to_datatime(x, unit) + if op.format: + x = x.cast(ibis_dtypes.str).to_timestamp(op.format) + else: + # The default unit is set to "ns" (nanoseconds) for consistency + # with pandas, where "ns" is the default unit for datetime operations. + unit = op.unit or "ns" + x = numeric_to_datatime(x, unit) return x.cast(ibis_dtypes.Timestamp(timezone="UTC" if op.utc else None)) diff --git a/bigframes/core/tools/datetimes.py b/bigframes/core/tools/datetimes.py index 4aaf320c7a..96bf556101 100644 --- a/bigframes/core/tools/datetimes.py +++ b/bigframes/core/tools/datetimes.py @@ -73,6 +73,14 @@ def to_datetime( f"String and Timestamp requires utc=True. {constants.FEEDBACK_LINK}" ) + if format and unit and arg.dtype in ("Int64", "Float64"): # type: ignore + raise ValueError("cannot specify both format and unit") + + if unit and arg.dtype not in ("Int64", "Float64"): # type: ignore + raise NotImplementedError( + f"Unit parameter is not supported for non-numerical input types. {constants.FEEDBACK_LINK}" + ) + return arg._apply_unary_op( # type: ignore ops.ToDatetimeOp( utc=utc, diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index ec61329aa5..a080a969c8 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -539,3 +539,46 @@ def test_to_datetime_series(scalars_dfs): pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + + +@pytest.mark.parametrize( + ("arg", "unit"), + [ + ([1, 2, 3], "W"), + ([1, 2, 3], "d"), + ([1, 2, 3], "D"), + ([1, 2, 3], "h"), + ([1, 2, 3], "m"), + ([20242330, 25244685, 34324234], "s"), + ([20242330000, 25244685000, 34324234000], "ms"), + ([20242330000000, 25244685000000, 34324234000000], "us"), + ([20242330000000000, 25244685000000000, 34324234000000000], "ns"), + ], +) +def test_to_datetime_unit_param(arg, unit): + bf_result = bpd.to_datetime(arg, unit=unit).to_pandas().astype("datetime64[ns]") + pd_result = pd.Series(pd.to_datetime(arg, unit=unit)).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + ) + + +@pytest.mark.parametrize( + ("arg", "utc", "format"), + [ + ([20230110, 20230101, 20230101], False, "%Y%m%d"), + ([201301.01], False, "%Y%m.%d"), + (["2023-01-10", "2023-01-20", "2023-01-01"], True, "%Y-%m-%d"), + (["2014-08-15 07:19"], True, "%Y-%m-%d %H:%M"), + ], +) +def test_to_datetime_format_param(arg, utc, format): + bf_result = ( + bpd.to_datetime(arg, utc=utc, format=format) + .to_pandas() + .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") + ) + pd_result = pd.Series(pd.to_datetime(arg, utc=utc, format=format)).dt.floor("us") + pd.testing.assert_series_equal( + bf_result, pd_result, check_index_type=False, check_names=False + )