Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

chore: define timedelta type and to_timedelta function #1317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 27, 2025
1 change: 1 addition & 0 deletions 1 bigframes/core/compile/ibis_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = {
pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
}
BIGFRAMES_TO_IBIS.update({bigframes.dtypes.TIMEDETLA_DTYPE: ibis_dtypes.int64})
IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, bigframes.dtypes.Dtype] = {
ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
}
Expand Down
7 changes: 7 additions & 0 deletions 7 bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,13 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp):
return x.cast(ibis_dtypes.Timestamp(timezone="UTC"))


@scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True)
def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp):
return (
typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore
).floor()


@scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True)
def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp):
ibis_node = getattr(op.func, "ibis_node", None)
Expand Down
4 changes: 4 additions & 0 deletions 4 bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
TIME_DTYPE = pd.ArrowDtype(pa.time64("us"))
DATETIME_DTYPE = pd.ArrowDtype(pa.timestamp("us"))
TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
TIMEDETLA_DTYPE = pd.ArrowDtype(pa.duration("us"))
NUMERIC_DTYPE = pd.ArrowDtype(pa.decimal128(38, 9))
BIGNUMERIC_DTYPE = pd.ArrowDtype(pa.decimal256(76, 38))
# No arrow equivalent
Expand Down Expand Up @@ -632,6 +633,9 @@ def convert_to_schema_field(
return google.cloud.bigquery.SchemaField(
name, "RECORD", fields=inner_fields
)
if bigframes_dtype.pyarrow_dtype == pa.duration("us"):
# Timedeltas are represented as integers in microseconds.
return google.cloud.bigquery.SchemaField(name, "INTEGER")
raise ValueError(
f"No arrow conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
)
Expand Down
3 changes: 3 additions & 0 deletions 3 bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
)
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import ToTimedeltaOp

__all__ = [
# Base ops
Expand Down Expand Up @@ -240,6 +241,8 @@
"minute_op",
"second_op",
"normalize_op",
# Timedelta ops
"ToTimedeltaOp",
# Datetime ops
"date_op",
"time_op",
Expand Down
31 changes: 31 additions & 0 deletions 31 bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import dataclasses
import typing

from bigframes import dtypes
from bigframes.operations import base_ops


@dataclasses.dataclass(frozen=True)
class ToTimedeltaOp(base_ops.UnaryOp):
name: typing.ClassVar[str] = "to_timedelta"
unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"]

def output_type(self, *input_types):
if input_types[0] is not dtypes.INT_DTYPE:
raise TypeError("expected integer input")
return dtypes.TIMEDETLA_DTYPE
2 changes: 2 additions & 0 deletions 2 bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import bigframes.dataframe
import bigframes.enums
import bigframes.functions._utils as bff_utils
from bigframes.pandas.core.api import to_timedelta
from bigframes.pandas.io.api import (
from_glob_path,
read_csv,
Expand Down Expand Up @@ -313,6 +314,7 @@ def reset_session():
"read_pickle",
"remote_function",
"to_datetime",
"to_timedelta",
"from_glob_path",
# pandas dtype attributes
"NA",
Expand Down
13 changes: 13 additions & 0 deletions 13 bigframes/pandas/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
17 changes: 17 additions & 0 deletions 17 bigframes/pandas/core/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from bigframes.pandas.core.tools.timedeltas import to_timedelta

__all__ = ["to_timedelta"]
13 changes: 13 additions & 0 deletions 13 bigframes/pandas/core/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
64 changes: 64 additions & 0 deletions 64 bigframes/pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing

from bigframes_vendored.pandas.core.tools import (
timedeltas as vendored_pandas_timedeltas,
)
import pandas as pd

from bigframes import operations as ops
from bigframes import series


def to_timedelta(
arg: typing.Union[series.Series, str, int, float],
unit: typing.Optional[vendored_pandas_timedeltas.UnitChoices] = None,
) -> typing.Union[series.Series, pd.Timedelta]:
if not isinstance(arg, series.Series):
return pd.to_timedelta(arg, unit)

canonical_unit = "us" if unit is None else _canonicalize_unit(unit)
return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit))


to_timedelta.__doc__ = vendored_pandas_timedeltas.to_timedelta.__doc__


def _canonicalize_unit(
unit: vendored_pandas_timedeltas.UnitChoices,
) -> typing.Literal["us", "ms", "s", "m", "h", "d", "W"]:
if unit in {"w", "W"}:
return "W"

if unit in {"D", "d", "days", "day"}:
return "d"

if unit in {"hours", "hour", "hr", "h"}:
return "h"

if unit in {"m", "minute", "min", "minutes"}:
return "m"

if unit in {"s", "seconds", "sec", "second"}:
return "s"

if unit in {"ms", "milliseconds", "millisecond", "milli", "millis"}:
return "ms"

if unit in {"us", "microseconds", "microsecond", "µs", "micro", "micros"}:
return "us"

raise TypeError(f"Unrecognized unit: {unit}")
67 changes: 67 additions & 0 deletions 67 tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from datetime import datetime
import typing

import pandas as pd
import pytest
Expand Down Expand Up @@ -726,3 +727,69 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc):
pd.testing.assert_series_equal(
bf_result, pd_result, check_index_type=False, check_names=False
)


@pytest.mark.parametrize(
"unit",
[
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
],
)
def test_to_timedelta_with_bf_series(session, unit):
bf_series = bpd.Series([1, 2, 3], session=session)
pd_series = pd.Series([1, 2, 3])

actual_result = (
typing.cast(bpd.Series, bpd.to_timedelta(bf_series, unit))
.to_pandas()
.astype("timedelta64[ns]")
)

expected_result = pd.to_timedelta(pd_series, unit)
pd.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
"unit",
["Y", "M", "whatever"],
)
def test_to_timedelta_with_bf_series_invalid_unit(session, unit):
bf_series = bpd.Series([1, 2, 3], session=session)

with pytest.raises(TypeError):
bpd.to_timedelta(bf_series, unit)


@pytest.mark.parametrize("input", [1, 1.2, "1s"])
def test_to_timedelta_non_bf_series(input):
assert bpd.to_timedelta(input) == pd.to_timedelta(input)
99 changes: 99 additions & 0 deletions 99 third_party/bigframes_vendored/pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Contains code from https://github.com/pandas-dev/pandas/blob/v2.2.3/pandas/core/tools/timedeltas.py

import typing

from bigframes_vendored import constants
import pandas as pd

from bigframes import series

UnitChoices = typing.Literal[
"W",
"w",
"D",
"d",
"days",
"day",
"hours",
"hour",
"hr",
"h",
"m",
"minute",
"min",
"minutes",
"s",
"seconds",
"sec",
"second",
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"us",
"microseconds",
"microsecond",
"µs",
"micro",
"micros",
]


def to_timedelta(
arg: typing.Union[series.Series, str, int, float],
unit: typing.Optional[UnitChoices] = None,
) -> typing.Union[series.Series, pd.Timedelta]:
"""
Converts a scalar or Series to a timedelta object.

.. note::
BigQuery only supports precision up to microseconds (us). Therefore, when working
with timedeltas that have a finer granularity than microseconds, be aware that
the additional precision will not be represented in BigQuery.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Converting a Scalar to timedelta

>>> scalar = 2
>>> bpd.to_timedelta(scalar, unit='s')
Timedelta('0 days 00:00:02')

Converting a Series of integers to a Series of timedeltas

>>> int_series = bpd.Series([1,2,3])
>>> bpd.to_timedelta(int_series, unit='s')
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: duration[us][pyarrow]

Args:
arg (int, float, str, Series):
The object to convert to a dataframe
unit (str, default 'us'):
Denotes the unit of the arg for numeric `arg`. Defaults to ``"us"``.

Possible values:

* 'W'
* 'D' / 'days' / 'day'
* 'hours' / 'hour' / 'hr' / 'h' / 'H'
* 'm' / 'minute' / 'min' / 'minutes'
* 's' / 'seconds' / 'sec' / 'second'
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis'
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros'

Returns:
Union[pandas.Timedelta, bigframes.pandas.Series]:
Return type depends on input
- Series: Series of duration[us][pyarrow] dtype
- scalar: timedelta

"""

raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.