diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index a0afa29a15..e5d637e426 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -81,6 +81,7 @@ BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = { pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS } +BIGFRAMES_TO_IBIS.update({bigframes.dtypes.TIMEDETLA_DTYPE: ibis_dtypes.int64}) IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, bigframes.dtypes.Dtype] = { ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS } diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 2ab10e025d..b42f983619 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1140,6 +1140,13 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): return x.cast(ibis_dtypes.Timestamp(timezone="UTC")) +@scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True) +def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp): + return ( + typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore + ).floor() + + @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True) def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp): ibis_node = getattr(op.func, "ibis_node", None) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 863615118a..4db124134a 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -55,6 +55,7 @@ TIME_DTYPE = pd.ArrowDtype(pa.time64("us")) DATETIME_DTYPE = pd.ArrowDtype(pa.timestamp("us")) TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC")) +TIMEDETLA_DTYPE = pd.ArrowDtype(pa.duration("us")) NUMERIC_DTYPE = pd.ArrowDtype(pa.decimal128(38, 9)) BIGNUMERIC_DTYPE = pd.ArrowDtype(pa.decimal256(76, 38)) # No arrow equivalent @@ -632,6 +633,9 @@ def convert_to_schema_field( return google.cloud.bigquery.SchemaField( name, "RECORD", fields=inner_fields ) + if bigframes_dtype.pyarrow_dtype == pa.duration("us"): + # Timedeltas are represented as integers in microseconds. + return google.cloud.bigquery.SchemaField(name, "INTEGER") raise ValueError( f"No arrow conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}" ) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index e55cbc4925..d8b0447686 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -170,6 +170,7 @@ ) from bigframes.operations.struct_ops import StructFieldOp, StructOp from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op +from bigframes.operations.timedelta_ops import ToTimedeltaOp __all__ = [ # Base ops @@ -240,6 +241,8 @@ "minute_op", "second_op", "normalize_op", + # Timedelta ops + "ToTimedeltaOp", # Datetime ops "date_op", "time_op", diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py new file mode 100644 index 0000000000..0bcd6eb08f --- /dev/null +++ b/bigframes/operations/timedelta_ops.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class ToTimedeltaOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_timedelta" + unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"] + + def output_type(self, *input_types): + if input_types[0] is not dtypes.INT_DTYPE: + raise TypeError("expected integer input") + return dtypes.TIMEDETLA_DTYPE diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index c744d3b945..4a5e4d4b3a 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -35,6 +35,7 @@ import bigframes.dataframe import bigframes.enums import bigframes.functions._utils as bff_utils +from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( from_glob_path, read_csv, @@ -313,6 +314,7 @@ def reset_session(): "read_pickle", "remote_function", "to_datetime", + "to_timedelta", "from_glob_path", # pandas dtype attributes "NA", diff --git a/bigframes/pandas/core/__init__.py b/bigframes/pandas/core/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/bigframes/pandas/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigframes/pandas/core/api.py b/bigframes/pandas/core/api.py new file mode 100644 index 0000000000..0f3161afcc --- /dev/null +++ b/bigframes/pandas/core/api.py @@ -0,0 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.pandas.core.tools.timedeltas import to_timedelta + +__all__ = ["to_timedelta"] diff --git a/bigframes/pandas/core/tools/__init__.py b/bigframes/pandas/core/tools/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/bigframes/pandas/core/tools/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigframes/pandas/core/tools/timedeltas.py b/bigframes/pandas/core/tools/timedeltas.py new file mode 100644 index 0000000000..0cedf425fe --- /dev/null +++ b/bigframes/pandas/core/tools/timedeltas.py @@ -0,0 +1,64 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from bigframes_vendored.pandas.core.tools import ( + timedeltas as vendored_pandas_timedeltas, +) +import pandas as pd + +from bigframes import operations as ops +from bigframes import series + + +def to_timedelta( + arg: typing.Union[series.Series, str, int, float], + unit: typing.Optional[vendored_pandas_timedeltas.UnitChoices] = None, +) -> typing.Union[series.Series, pd.Timedelta]: + if not isinstance(arg, series.Series): + return pd.to_timedelta(arg, unit) + + canonical_unit = "us" if unit is None else _canonicalize_unit(unit) + return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit)) + + +to_timedelta.__doc__ = vendored_pandas_timedeltas.to_timedelta.__doc__ + + +def _canonicalize_unit( + unit: vendored_pandas_timedeltas.UnitChoices, +) -> typing.Literal["us", "ms", "s", "m", "h", "d", "W"]: + if unit in {"w", "W"}: + return "W" + + if unit in {"D", "d", "days", "day"}: + return "d" + + if unit in {"hours", "hour", "hr", "h"}: + return "h" + + if unit in {"m", "minute", "min", "minutes"}: + return "m" + + if unit in {"s", "seconds", "sec", "second"}: + return "s" + + if unit in {"ms", "milliseconds", "millisecond", "milli", "millis"}: + return "ms" + + if unit in {"us", "microseconds", "microsecond", "µs", "micro", "micros"}: + return "us" + + raise TypeError(f"Unrecognized unit: {unit}") diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 30ffaa8a7d..e46d073056 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -13,6 +13,7 @@ # limitations under the License. from datetime import datetime +import typing import pandas as pd import pytest @@ -726,3 +727,69 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc): pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + + +@pytest.mark.parametrize( + "unit", + [ + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", + ], +) +def test_to_timedelta_with_bf_series(session, unit): + bf_series = bpd.Series([1, 2, 3], session=session) + pd_series = pd.Series([1, 2, 3]) + + actual_result = ( + typing.cast(bpd.Series, bpd.to_timedelta(bf_series, unit)) + .to_pandas() + .astype("timedelta64[ns]") + ) + + expected_result = pd.to_timedelta(pd_series, unit) + pd.testing.assert_series_equal( + actual_result, expected_result, check_index_type=False + ) + + +@pytest.mark.parametrize( + "unit", + ["Y", "M", "whatever"], +) +def test_to_timedelta_with_bf_series_invalid_unit(session, unit): + bf_series = bpd.Series([1, 2, 3], session=session) + + with pytest.raises(TypeError): + bpd.to_timedelta(bf_series, unit) + + +@pytest.mark.parametrize("input", [1, 1.2, "1s"]) +def test_to_timedelta_non_bf_series(input): + assert bpd.to_timedelta(input) == pd.to_timedelta(input) diff --git a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py new file mode 100644 index 0000000000..9442e965fa --- /dev/null +++ b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py @@ -0,0 +1,99 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/v2.2.3/pandas/core/tools/timedeltas.py + +import typing + +from bigframes_vendored import constants +import pandas as pd + +from bigframes import series + +UnitChoices = typing.Literal[ + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", +] + + +def to_timedelta( + arg: typing.Union[series.Series, str, int, float], + unit: typing.Optional[UnitChoices] = None, +) -> typing.Union[series.Series, pd.Timedelta]: + """ + Converts a scalar or Series to a timedelta object. + + .. note:: + BigQuery only supports precision up to microseconds (us). Therefore, when working + with timedeltas that have a finer granularity than microseconds, be aware that + the additional precision will not be represented in BigQuery. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Converting a Scalar to timedelta + + >>> scalar = 2 + >>> bpd.to_timedelta(scalar, unit='s') + Timedelta('0 days 00:00:02') + + Converting a Series of integers to a Series of timedeltas + + >>> int_series = bpd.Series([1,2,3]) + >>> bpd.to_timedelta(int_series, unit='s') + 0 0 days 00:00:01 + 1 0 days 00:00:02 + 2 0 days 00:00:03 + dtype: duration[us][pyarrow] + + Args: + arg (int, float, str, Series): + The object to convert to a dataframe + unit (str, default 'us'): + Denotes the unit of the arg for numeric `arg`. Defaults to ``"us"``. + + Possible values: + + * 'W' + * 'D' / 'days' / 'day' + * 'hours' / 'hour' / 'hr' / 'h' / 'H' + * 'm' / 'minute' / 'min' / 'minutes' + * 's' / 'seconds' / 'sec' / 'second' + * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' + * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' + + Returns: + Union[pandas.Timedelta, bigframes.pandas.Series]: + Return type depends on input + - Series: Series of duration[us][pyarrow] dtype + - scalar: timedelta + + """ + + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)