From df3b288a7590425e613917c4a3b37dd53e84576a Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 18:53:15 +0000 Subject: [PATCH 1/8] define timedelta type and to_timedelta function --- bigframes/core/compile/ibis_types.py | 1 + bigframes/core/compile/scalar_op_compiler.py | 5 + bigframes/dtypes.py | 8 ++ bigframes/operations/__init__.py | 3 + bigframes/operations/timedelta_ops.py | 31 ++++++ bigframes/pandas/__init__.py | 2 + bigframes/pandas/core/__init__.py | 13 +++ bigframes/pandas/core/api.py | 17 ++++ bigframes/pandas/core/tools/__init__.py | 13 +++ bigframes/pandas/core/tools/timedeltas.py | 64 ++++++++++++ notebooks/test.ipynb | 88 +++++++++++++++++ tests/system/small/test_pandas.py | 62 ++++++++++++ .../pandas/core/tools/timedeltas.py | 99 +++++++++++++++++++ 13 files changed, 406 insertions(+) create mode 100644 bigframes/operations/timedelta_ops.py create mode 100644 bigframes/pandas/core/__init__.py create mode 100644 bigframes/pandas/core/api.py create mode 100644 bigframes/pandas/core/tools/__init__.py create mode 100644 bigframes/pandas/core/tools/timedeltas.py create mode 100644 notebooks/test.ipynb create mode 100644 third_party/bigframes_vendored/pandas/core/tools/timedeltas.py diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index 18f0834903..fd3007f8bd 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -82,6 +82,7 @@ BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = { pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS } +BIGFRAMES_TO_IBIS.update({bigframes.dtypes.TIMEDETLA_DTYPE: ibis_dtypes.int64}) IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, bigframes.dtypes.Dtype] = { ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS } diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 4f670b51ca..a6aa1aa533 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1140,6 +1140,11 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): return x.cast(ibis_dtypes.Timestamp(timezone="UTC")) +@scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True) +def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp): + return x * UNIT_TO_US_CONVERSION_FACTORS[op.unit] + + @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True) def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp): ibis_node = getattr(op.func, "ibis_node", None) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 3da3fa24f3..8170726ef2 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -54,6 +54,7 @@ TIME_DTYPE = pd.ArrowDtype(pa.time64("us")) DATETIME_DTYPE = pd.ArrowDtype(pa.timestamp("us")) TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC")) +TIMEDETLA_DTYPE = pd.ArrowDtype(pa.duration("us")) NUMERIC_DTYPE = pd.ArrowDtype(pa.decimal128(38, 9)) BIGNUMERIC_DTYPE = pd.ArrowDtype(pa.decimal256(76, 38)) # No arrow equivalent @@ -194,6 +195,13 @@ class SimpleDtypeInfo: orderable=True, clusterable=True, ), + SimpleDtypeInfo( + dtype=TIMEDETLA_DTYPE, + arrow_dtype=pa.duration("us"), + type_kind=("INTEGER",), + orderable=True, + clusterable=True, + ), SimpleDtypeInfo( dtype=BYTES_DTYPE, arrow_dtype=pa.binary(), type_kind=("BYTES",), orderable=True ), diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index e55cbc4925..d8b0447686 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -170,6 +170,7 @@ ) from bigframes.operations.struct_ops import StructFieldOp, StructOp from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op +from bigframes.operations.timedelta_ops import ToTimedeltaOp __all__ = [ # Base ops @@ -240,6 +241,8 @@ "minute_op", "second_op", "normalize_op", + # Timedelta ops + "ToTimedeltaOp", # Datetime ops "date_op", "time_op", diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py new file mode 100644 index 0000000000..0bcd6eb08f --- /dev/null +++ b/bigframes/operations/timedelta_ops.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import dataclasses +import typing + +from bigframes import dtypes +from bigframes.operations import base_ops + + +@dataclasses.dataclass(frozen=True) +class ToTimedeltaOp(base_ops.UnaryOp): + name: typing.ClassVar[str] = "to_timedelta" + unit: typing.Literal["us", "ms", "s", "m", "h", "d", "W"] + + def output_type(self, *input_types): + if input_types[0] is not dtypes.INT_DTYPE: + raise TypeError("expected integer input") + return dtypes.TIMEDETLA_DTYPE diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 395b573916..db0ae2e868 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -35,6 +35,7 @@ import bigframes.dataframe import bigframes.enums import bigframes.functions._utils as functions_utils +from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( from_glob_path, read_csv, @@ -313,6 +314,7 @@ def reset_session(): "read_pickle", "remote_function", "to_datetime", + "to_timedelta", "from_glob_path", # pandas dtype attributes "NA", diff --git a/bigframes/pandas/core/__init__.py b/bigframes/pandas/core/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/bigframes/pandas/core/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigframes/pandas/core/api.py b/bigframes/pandas/core/api.py new file mode 100644 index 0000000000..0f3161afcc --- /dev/null +++ b/bigframes/pandas/core/api.py @@ -0,0 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.pandas.core.tools.timedeltas import to_timedelta + +__all__ = ["to_timedelta"] diff --git a/bigframes/pandas/core/tools/__init__.py b/bigframes/pandas/core/tools/__init__.py new file mode 100644 index 0000000000..0a2669d7a2 --- /dev/null +++ b/bigframes/pandas/core/tools/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigframes/pandas/core/tools/timedeltas.py b/bigframes/pandas/core/tools/timedeltas.py new file mode 100644 index 0000000000..0cedf425fe --- /dev/null +++ b/bigframes/pandas/core/tools/timedeltas.py @@ -0,0 +1,64 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from bigframes_vendored.pandas.core.tools import ( + timedeltas as vendored_pandas_timedeltas, +) +import pandas as pd + +from bigframes import operations as ops +from bigframes import series + + +def to_timedelta( + arg: typing.Union[series.Series, str, int, float], + unit: typing.Optional[vendored_pandas_timedeltas.UnitChoices] = None, +) -> typing.Union[series.Series, pd.Timedelta]: + if not isinstance(arg, series.Series): + return pd.to_timedelta(arg, unit) + + canonical_unit = "us" if unit is None else _canonicalize_unit(unit) + return arg._apply_unary_op(ops.ToTimedeltaOp(canonical_unit)) + + +to_timedelta.__doc__ = vendored_pandas_timedeltas.to_timedelta.__doc__ + + +def _canonicalize_unit( + unit: vendored_pandas_timedeltas.UnitChoices, +) -> typing.Literal["us", "ms", "s", "m", "h", "d", "W"]: + if unit in {"w", "W"}: + return "W" + + if unit in {"D", "d", "days", "day"}: + return "d" + + if unit in {"hours", "hour", "hr", "h"}: + return "h" + + if unit in {"m", "minute", "min", "minutes"}: + return "m" + + if unit in {"s", "seconds", "sec", "second"}: + return "s" + + if unit in {"ms", "milliseconds", "millisecond", "milli", "millis"}: + return "ms" + + if unit in {"us", "microseconds", "microsecond", "µs", "micro", "micros"}: + return "us" + + raise TypeError(f"Unrecognized unit: {unit}") diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb new file mode 100644 index 0000000000..852fcf97df --- /dev/null +++ b/notebooks/test.ipynb @@ -0,0 +1,88 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "bpd.options.display.progress_bar = None" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('0 days 00:00:02')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scalar = 2\n", + "bpd.to_timedelta(scalar, unit='s')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/global_session.py:114: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " return func(get_global_session(), *args, **kwargs)\n" + ] + }, + { + "data": { + "text/plain": [ + "0 0 days 00:00:01\n", + "1 0 days 00:00:02\n", + "2 0 days 00:00:03\n", + "dtype: duration[us][pyarrow]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "int_series = bpd.Series([1,2,3])\n", + "bpd.to_timedelta(int_series, unit='s')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 30ffaa8a7d..40d5c2e1ed 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -726,3 +726,65 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc): pd.testing.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) + + +@pytest.mark.parametrize( + "unit", + [ + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", + ], +) +def test_to_timedelta_with_bf_series(session, unit): + bf_series = bpd.Series([1, 2, 3], session=session) + pd_series = pd.Series([1, 2, 3]) + + actual_result = bpd.to_timedelta(bf_series, unit).to_pandas() + + expected_result = pd.to_timedelta(pd_series, unit).astype("duration[us][pyarrow]") + pd.testing.assert_series_equal( + actual_result, expected_result, check_index_type=False + ) + + +@pytest.mark.parametrize( + "unit", + ["Y", "M", "whatever"], +) +def test_to_timedelta_with_bf_series_invalid_unit(session, unit): + bf_series = bpd.Series([1, 2, 3], session=session) + + with pytest.raises(TypeError): + bpd.to_timedelta(bf_series, unit) + + +@pytest.mark.parametrize("input", [1, 1.2, "1s"]) +def test_to_timedelta_non_bf_series(input): + assert bpd.to_timedelta(input) == pd.to_timedelta(input) diff --git a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py new file mode 100644 index 0000000000..49c0cafc46 --- /dev/null +++ b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py @@ -0,0 +1,99 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/v2.2.3/pandas/core/tools/timedeltas.py + +import typing + +from bigframes_vendored import constants +import pandas as pd + +from bigframes import series + +UnitChoices: typing.TypeAlias = typing.Literal[ + "W", + "w", + "D", + "d", + "days", + "day", + "hours", + "hour", + "hr", + "h", + "m", + "minute", + "min", + "minutes", + "s", + "seconds", + "sec", + "second", + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "us", + "microseconds", + "microsecond", + "µs", + "micro", + "micros", +] + + +def to_timedelta( + arg: typing.Union[series.Series, str, int, float], + unit: typing.Optional[UnitChoices] = None, +) -> typing.Union[series.Series, pd.Timedelta]: + """ + Converts a scalar or Series to a timedelta object. + + .. note:: + BigQuery only supports precision up to microseconds (us). Therefore, when working + with timedeltas that have a finer granularity than microseconds, be aware that + the additional precision will not be represented in BigQuery. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Converting a Scalar to timedelta + + >>> scalar = 2 + >>> bpd.to_timedelta(scalar, unit='s') + Timedelta('0 days 00:00:02') + + Converting a Series of integers to a Series of timedeltas + + >>> int_series = bpd.Series([1,2,3]) + >>> bpd.to_timedelta(int_series, unit='s') + 0 0 days 00:00:01 + 1 0 days 00:00:02 + 2 0 days 00:00:03 + dtype: duration[us][pyarrow] + + Args: + arg (int, float, str, Series): + The object to convert to a dataframe + unit (str, default 'us'): + Denotes the unit of the arg for numeric `arg`. Defaults to ``"us"``. + + Possible values: + + * 'W' + * 'D' / 'days' / 'day' + * 'hours' / 'hour' / 'hr' / 'h' / 'H' + * 'm' / 'minute' / 'min' / 'minutes' + * 's' / 'seconds' / 'sec' / 'second' + * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' + * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' + + Returns: + Union[pandas.Timedelta, bigframes.pandas.Series]: + Return type depends on input + - Series: Series of duration[us][pyarrow] dtype + - scalar: timedelta + + """ + + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 00d10bf4b2a5f3b9e9027d135b9e691d9530aed1 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 18:54:41 +0000 Subject: [PATCH 2/8] remove unnecessary file --- notebooks/test.ipynb | 88 -------------------------------------------- 1 file changed, 88 deletions(-) delete mode 100644 notebooks/test.ipynb diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb deleted file mode 100644 index 852fcf97df..0000000000 --- a/notebooks/test.ipynb +++ /dev/null @@ -1,88 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "bpd.options.display.progress_bar = None" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timedelta('0 days 00:00:02')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "scalar = 2\n", - "bpd.to_timedelta(scalar, unit='s')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/global_session.py:114: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " return func(get_global_session(), *args, **kwargs)\n" - ] - }, - { - "data": { - "text/plain": [ - "0 0 days 00:00:01\n", - "1 0 days 00:00:02\n", - "2 0 days 00:00:03\n", - "dtype: duration[us][pyarrow]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "int_series = bpd.Series([1,2,3])\n", - "bpd.to_timedelta(int_series, unit='s')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From b1e9672ec0aee0cc5ba1d25bd8ce975657f14dc5 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 18:58:45 +0000 Subject: [PATCH 3/8] remove TypeAlias type for 3.9 compatibility --- third_party/bigframes_vendored/pandas/core/tools/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py index 49c0cafc46..9442e965fa 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py +++ b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py @@ -7,7 +7,7 @@ from bigframes import series -UnitChoices: typing.TypeAlias = typing.Literal[ +UnitChoices = typing.Literal[ "W", "w", "D", From dd251a65f967b4d7da13a056faad4667974e2d98 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 19:28:04 +0000 Subject: [PATCH 4/8] fix mypy --- bigframes/core/compile/scalar_op_compiler.py | 4 +++- tests/system/small/test_pandas.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a6aa1aa533..a7e7346169 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1142,7 +1142,9 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): @scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True) def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp): - return x * UNIT_TO_US_CONVERSION_FACTORS[op.unit] + return ( + typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore + ).floor() @scalar_op_compiler.register_unary_op(ops.RemoteFunctionOp, pass_op=True) diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 40d5c2e1ed..9319a17024 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -13,6 +13,7 @@ # limitations under the License. from datetime import datetime +import typing import pandas as pd import pytest @@ -766,7 +767,9 @@ def test_to_timedelta_with_bf_series(session, unit): bf_series = bpd.Series([1, 2, 3], session=session) pd_series = pd.Series([1, 2, 3]) - actual_result = bpd.to_timedelta(bf_series, unit).to_pandas() + actual_result = typing.cast( + bpd.Series, bpd.to_timedelta(bf_series, unit) + ).to_pandas() expected_result = pd.to_timedelta(pd_series, unit).astype("duration[us][pyarrow]") pd.testing.assert_series_equal( From 348b85ee337755335164b62c7a79e1d6ee3d661f Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 19:30:19 +0000 Subject: [PATCH 5/8] fix lint --- bigframes/core/compile/scalar_op_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a7e7346169..311a2cc0cc 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1143,7 +1143,7 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp): @scalar_op_compiler.register_unary_op(ops.ToTimedeltaOp, pass_op=True) def to_timedelta_op_impl(x: ibis_types.Value, op: ops.ToTimedeltaOp): return ( - typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore + typing.cast(ibis_types.NumericValue, x) * UNIT_TO_US_CONVERSION_FACTORS[op.unit] # type: ignore ).floor() From b477d4b1e765d313f96de82e7acef120d5eed5d2 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 23 Jan 2025 22:10:25 +0000 Subject: [PATCH 6/8] move timedelta out of the simple dtype list --- bigframes/dtypes.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 8170726ef2..38a0432339 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -195,13 +195,6 @@ class SimpleDtypeInfo: orderable=True, clusterable=True, ), - SimpleDtypeInfo( - dtype=TIMEDETLA_DTYPE, - arrow_dtype=pa.duration("us"), - type_kind=("INTEGER",), - orderable=True, - clusterable=True, - ), SimpleDtypeInfo( dtype=BYTES_DTYPE, arrow_dtype=pa.binary(), type_kind=("BYTES",), orderable=True ), @@ -640,6 +633,9 @@ def convert_to_schema_field( return google.cloud.bigquery.SchemaField( name, "RECORD", fields=inner_fields ) + if bigframes_dtype.pyarrow_dtype == pa.duration('us'): + # Timedeltas are represented as integers in microseconds. + return google.cloud.bigquery.SchemaField(name, "INTEGER") raise ValueError( f"No arrow conversion for {bigframes_dtype}. {constants.FEEDBACK_LINK}" ) From e65d5e46eb3570deaed38cd235e3bc0607c9a497 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 23 Jan 2025 22:12:52 +0000 Subject: [PATCH 7/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 38a0432339..a64c0174d6 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -633,7 +633,7 @@ def convert_to_schema_field( return google.cloud.bigquery.SchemaField( name, "RECORD", fields=inner_fields ) - if bigframes_dtype.pyarrow_dtype == pa.duration('us'): + if bigframes_dtype.pyarrow_dtype == pa.duration("us"): # Timedeltas are represented as integers in microseconds. return google.cloud.bigquery.SchemaField(name, "INTEGER") raise ValueError( From d7fb0d88e8dab681d521ab6a5741c94c211bb112 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 24 Jan 2025 01:02:03 +0000 Subject: [PATCH 8/8] fix type casts in tests --- tests/system/small/test_pandas.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index 9319a17024..e46d073056 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -767,11 +767,13 @@ def test_to_timedelta_with_bf_series(session, unit): bf_series = bpd.Series([1, 2, 3], session=session) pd_series = pd.Series([1, 2, 3]) - actual_result = typing.cast( - bpd.Series, bpd.to_timedelta(bf_series, unit) - ).to_pandas() + actual_result = ( + typing.cast(bpd.Series, bpd.to_timedelta(bf_series, unit)) + .to_pandas() + .astype("timedelta64[ns]") + ) - expected_result = pd.to_timedelta(pd_series, unit).astype("duration[us][pyarrow]") + expected_result = pd.to_timedelta(pd_series, unit) pd.testing.assert_series_equal( actual_result, expected_result, check_index_type=False )