From 5e9cf5bcf76dcb671e8e2a676e5e2b845f346e3c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 4 Jan 2024 01:34:19 +0000 Subject: [PATCH 1/2] feat: add Series dt.tz and dt.unit properties --- bigframes/operations/datetimes.py | 37 ++++++-- .../system/small/operations/test_datetimes.py | 89 ++++++++++++------- .../pandas/core/indexes/accessor.py | 19 ++++ 3 files changed, 107 insertions(+), 38 deletions(-) diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index a8a33beb57..3165e6f003 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -14,6 +14,9 @@ from __future__ import annotations +import datetime as dt +from typing import Optional + from bigframes.core import log_adapter import bigframes.operations as ops import bigframes.operations.base @@ -27,6 +30,7 @@ class DatetimeMethods( ): __doc__ = vendordt.DatetimeProperties.__doc__ + # Date accessors @property def day(self) -> series.Series: return self._apply_unary_op(ops.day_op) @@ -40,17 +44,26 @@ def date(self) -> series.Series: return self._apply_unary_op(ops.date_op) @property - def hour(self) -> series.Series: - return self._apply_unary_op(ops.hour_op) + def quarter(self) -> series.Series: + return self._apply_unary_op(ops.quarter_op) @property - def minute(self) -> series.Series: - return self._apply_unary_op(ops.minute_op) + def year(self) -> series.Series: + return self._apply_unary_op(ops.year_op) @property def month(self) -> series.Series: return self._apply_unary_op(ops.month_op) + # Time accessors + @property + def hour(self) -> series.Series: + return self._apply_unary_op(ops.hour_op) + + @property + def minute(self) -> series.Series: + return self._apply_unary_op(ops.minute_op) + @property def second(self) -> series.Series: return self._apply_unary_op(ops.second_op) @@ -60,9 +73,17 @@ def time(self) -> series.Series: return self._apply_unary_op(ops.time_op) @property - def quarter(self) -> series.Series: - return self._apply_unary_op(ops.quarter_op) + def tz(self) -> Optional[dt.timezone]: + # Assumption: pyarrow dtype + tz_string = self._dtype.pyarrow_dtype.tz + if tz_string == "UTC": + return dt.timezone.utc + elif tz_string is None: + return None + else: + raise ValueError(f"Unexpected timezone {tz_string}") @property - def year(self) -> series.Series: - return self._apply_unary_op(ops.year_op) + def unit(self) -> str: + # Assumption: pyarrow dtype + return self._dtype.pyarrow_dtype.unit diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index 177194c7a8..de31378e42 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -16,16 +16,22 @@ import pytest import bigframes.series -from tests.system.utils import assert_series_equal +from tests.system.utils import assert_series_equal, skip_legacy_pandas DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)] +DATE_COLUMNS = [ + ("datetime_col",), + ("timestamp_col",), + ("date_col",), +] @pytest.mark.parametrize( ("col_name",), - DATETIME_COL_NAMES, + DATE_COLUMNS, ) -def test_day(scalars_dfs, col_name): +@skip_legacy_pandas +def test_dt_day(scalars_dfs, col_name): if pd.__version__.startswith("1."): pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs @@ -43,7 +49,8 @@ def test_day(scalars_dfs, col_name): ("col_name",), DATETIME_COL_NAMES, ) -def test_date(scalars_dfs, col_name): +@skip_legacy_pandas +def test_dt_date(scalars_dfs, col_name): if pd.__version__.startswith("1."): pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs @@ -59,11 +66,10 @@ def test_date(scalars_dfs, col_name): @pytest.mark.parametrize( ("col_name",), - DATETIME_COL_NAMES, + DATE_COLUMNS, ) -def test_dayofweek(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_dayofweek(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.dayofweek.to_pandas() @@ -76,9 +82,8 @@ def test_dayofweek(scalars_dfs, col_name): ("col_name",), DATETIME_COL_NAMES, ) -def test_hour(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_hour(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.hour.to_pandas() @@ -94,9 +99,8 @@ def test_hour(scalars_dfs, col_name): ("col_name",), DATETIME_COL_NAMES, ) -def test_minute(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_minute(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.minute.to_pandas() @@ -110,11 +114,10 @@ def test_minute(scalars_dfs, col_name): @pytest.mark.parametrize( ("col_name",), - DATETIME_COL_NAMES, + DATE_COLUMNS, ) -def test_month(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_month(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.month.to_pandas() @@ -128,9 +131,10 @@ def test_month(scalars_dfs, col_name): @pytest.mark.parametrize( ("col_name",), - DATETIME_COL_NAMES, + DATE_COLUMNS, ) -def test_quarter(scalars_dfs, col_name): +@skip_legacy_pandas +def test_dt_quarter(scalars_dfs, col_name): if pd.__version__.startswith("1."): pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs @@ -148,9 +152,8 @@ def test_quarter(scalars_dfs, col_name): ("col_name",), DATETIME_COL_NAMES, ) -def test_second(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_second(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.second.to_pandas() @@ -166,9 +169,8 @@ def test_second(scalars_dfs, col_name): ("col_name",), DATETIME_COL_NAMES, ) -def test_time(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_time(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.time.to_pandas() @@ -182,11 +184,10 @@ def test_time(scalars_dfs, col_name): @pytest.mark.parametrize( ("col_name",), - DATETIME_COL_NAMES, + DATE_COLUMNS, ) -def test_year(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") +@skip_legacy_pandas +def test_dt_year(scalars_dfs, col_name): scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.year.to_pandas() @@ -196,3 +197,31 @@ def test_year(scalars_dfs, col_name): pd_result.astype(pd.Int64Dtype()), bf_result, ) + + +@pytest.mark.parametrize( + ("col_name",), + DATETIME_COL_NAMES, +) +@skip_legacy_pandas +def test_dt_tz(scalars_dfs, col_name): + scalars_df, scalars_pandas_df = scalars_dfs + bf_series: bigframes.series.Series = scalars_df[col_name] + bf_result = bf_series.dt.tz + pd_result = scalars_pandas_df[col_name].dt.tz + + assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("col_name",), + DATETIME_COL_NAMES, +) +@skip_legacy_pandas +def test_dt_unit(scalars_dfs, col_name): + scalars_df, scalars_pandas_df = scalars_dfs + bf_series: bigframes.series.Series = scalars_df[col_name] + bf_result = bf_series.dt.unit + pd_result = scalars_pandas_df[col_name].dt.unit + + assert bf_result == pd_result diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index 2b4a326317..9490f4608b 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -94,3 +94,22 @@ def year(self): """The year of the datetime.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def tz(self): + """Return the timezone. + + Returns: + datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None + """ + + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def unit(self) -> str: + """Returns the unit of time precision. + + Returns: + Unit as string (eg. "us"). + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From cca5453bfbdee8cb05de06f1842f8e922c27d4fd Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 5 Jan 2024 00:36:54 +0000 Subject: [PATCH 2/2] remove redundant pandas version checks from tests --- tests/system/small/operations/test_datetimes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py index de31378e42..3882491ecb 100644 --- a/tests/system/small/operations/test_datetimes.py +++ b/tests/system/small/operations/test_datetimes.py @@ -32,8 +32,6 @@ ) @skip_legacy_pandas def test_dt_day(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.day.to_pandas() @@ -51,8 +49,6 @@ def test_dt_day(scalars_dfs, col_name): ) @skip_legacy_pandas def test_dt_date(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.date.to_pandas() @@ -135,8 +131,6 @@ def test_dt_month(scalars_dfs, col_name): ) @skip_legacy_pandas def test_dt_quarter(scalars_dfs, col_name): - if pd.__version__.startswith("1."): - pytest.skip("Pyarrow datetime objects not support in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs bf_series: bigframes.series.Series = scalars_df[col_name] bf_result = bf_series.dt.quarter.to_pandas()