Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit c9ca02c

Browse filesBrowse files
authored
feat: add DataFrame.resample and Series.resample (#2213)
* feat: add DataFrame.resample and Series.resample * raise for unsupported values * add docstrings * fix dataframe tests
1 parent 0a3e172 commit c9ca02c
Copy full SHA for c9ca02c

10 files changed

+288-123Lines changed: 288 additions & 123 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/blocks.py‎

Copy file name to clipboardExpand all lines: bigframes/core/blocks.py
+25Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,6 +1996,31 @@ def _generate_resample_label(
19961996
Literal["epoch", "start", "start_day", "end", "end_day"],
19971997
] = "start_day",
19981998
) -> Block:
1999+
if not isinstance(rule, str):
2000+
raise NotImplementedError(
2001+
f"Only offset strings are currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2002+
)
2003+
2004+
if rule in ("ME", "YE", "QE", "BME", "BA", "BQE", "W"):
2005+
raise NotImplementedError(
2006+
f"Offset strings 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', 'W' are not currently supported for rule, but got {repr(rule)}. {constants.FEEDBACK_LINK}"
2007+
)
2008+
2009+
if closed == "right":
2010+
raise NotImplementedError(
2011+
f"Only closed='left' is currently supported. {constants.FEEDBACK_LINK}",
2012+
)
2013+
2014+
if label == "right":
2015+
raise NotImplementedError(
2016+
f"Only label='left' is currently supported. {constants.FEEDBACK_LINK}",
2017+
)
2018+
2019+
if origin not in ("epoch", "start", "start_day"):
2020+
raise NotImplementedError(
2021+
f"Only origin='epoch', 'start', 'start_day' are currently supported, but got {repr(origin)}. {constants.FEEDBACK_LINK}"
2022+
)
2023+
19992024
# Validate and resolve the index or column to use for grouping
20002025
if on is None:
20012026
if len(self.index_columns) == 0:
Collapse file

‎bigframes/dataframe.py‎

Copy file name to clipboardExpand all lines: bigframes/dataframe.py
+5-57Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,10 +4182,12 @@ def _split(
41824182
return [DataFrame(block) for block in blocks]
41834183

41844184
@validations.requires_ordering()
4185-
def _resample(
4185+
def resample(
41864186
self,
41874187
rule: str,
41884188
*,
4189+
closed: Optional[Literal["right", "left"]] = None,
4190+
label: Optional[Literal["right", "left"]] = None,
41894191
on: blocks.Label = None,
41904192
level: Optional[LevelsType] = None,
41914193
origin: Union[
@@ -4195,64 +4197,10 @@ def _resample(
41954197
Literal["epoch", "start", "start_day", "end", "end_day"],
41964198
] = "start_day",
41974199
) -> bigframes.core.groupby.DataFrameGroupBy:
4198-
"""Internal function to support resample. Resample time-series data.
4199-
4200-
**Examples:**
4201-
4202-
>>> import bigframes.pandas as bpd
4203-
>>> data = {
4204-
... "timestamp_col": pd.date_range(
4205-
... start="2021-01-01 13:00:00", periods=30, freq="1s"
4206-
... ),
4207-
... "int64_col": range(30),
4208-
... "int64_too": range(10, 40),
4209-
... }
4210-
4211-
Resample on a DataFrame with index:
4212-
4213-
>>> df = bpd.DataFrame(data).set_index("timestamp_col")
4214-
>>> df._resample(rule="7s").min()
4215-
int64_col int64_too
4216-
2021-01-01 12:59:55 0 10
4217-
2021-01-01 13:00:02 2 12
4218-
2021-01-01 13:00:09 9 19
4219-
2021-01-01 13:00:16 16 26
4220-
2021-01-01 13:00:23 23 33
4221-
<BLANKLINE>
4222-
[5 rows x 2 columns]
4223-
4224-
Resample with column and origin set to 'start':
4225-
4226-
>>> df = bpd.DataFrame(data)
4227-
>>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
4228-
int64_col int64_too
4229-
2021-01-01 13:00:00 0 10
4230-
2021-01-01 13:00:07 7 17
4231-
2021-01-01 13:00:14 14 24
4232-
2021-01-01 13:00:21 21 31
4233-
2021-01-01 13:00:28 28 38
4234-
<BLANKLINE>
4235-
[5 rows x 2 columns]
4236-
4237-
Args:
4238-
rule (str):
4239-
The offset string representing target conversion.
4240-
on (str, default None):
4241-
For a DataFrame, column to use instead of index for resampling. Column
4242-
must be datetime-like.
4243-
level (str or int, default None):
4244-
For a MultiIndex, level (name or number) to use for resampling.
4245-
level must be datetime-like.
4246-
origin(str, default 'start_day'):
4247-
The timestamp on which to adjust the grouping. Must be one of the following:
4248-
'epoch': origin is 1970-01-01
4249-
'start': origin is the first value of the timeseries
4250-
'start_day': origin is the first day at midnight of the timeseries
4251-
Returns:
4252-
DataFrameGroupBy: DataFrameGroupBy object.
4253-
"""
42544200
block = self._block._generate_resample_label(
42554201
rule=rule,
4202+
closed=closed,
4203+
label=label,
42564204
on=on,
42574205
level=level,
42584206
origin=origin,
Collapse file

‎bigframes/series.py‎

Copy file name to clipboardExpand all lines: bigframes/series.py
+1-38Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,7 +2505,7 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
25052505
)
25062506

25072507
@validations.requires_ordering()
2508-
def _resample(
2508+
def resample(
25092509
self,
25102510
rule: str,
25112511
*,
@@ -2519,43 +2519,6 @@ def _resample(
25192519
Literal["epoch", "start", "start_day", "end", "end_day"],
25202520
] = "start_day",
25212521
) -> bigframes.core.groupby.SeriesGroupBy:
2522-
"""Internal function to support resample. Resample time-series data.
2523-
2524-
**Examples:**
2525-
2526-
>>> import bigframes.pandas as bpd
2527-
>>> data = {
2528-
... "timestamp_col": pd.date_range(
2529-
... start="2021-01-01 13:00:00", periods=30, freq="1s"
2530-
... ),
2531-
... "int64_col": range(30),
2532-
... }
2533-
>>> s = bpd.DataFrame(data).set_index("timestamp_col")
2534-
>>> s._resample(rule="7s", origin="epoch").min()
2535-
int64_col
2536-
2021-01-01 12:59:56 0
2537-
2021-01-01 13:00:03 3
2538-
2021-01-01 13:00:10 10
2539-
2021-01-01 13:00:17 17
2540-
2021-01-01 13:00:24 24
2541-
<BLANKLINE>
2542-
[5 rows x 1 columns]
2543-
2544-
2545-
Args:
2546-
rule (str):
2547-
The offset string representing target conversion.
2548-
level (str or int, default None):
2549-
For a MultiIndex, level (name or number) to use for resampling.
2550-
level must be datetime-like.
2551-
origin(str, default 'start_day'):
2552-
The timestamp on which to adjust the grouping. Must be one of the following:
2553-
'epoch': origin is 1970-01-01
2554-
'start': origin is the first value of the timeseries
2555-
'start_day': origin is the first day at midnight of the timeseries
2556-
Returns:
2557-
SeriesGroupBy: SeriesGroupBy object.
2558-
"""
25592522
block = self._block._generate_resample_label(
25602523
rule=rule,
25612524
closed=closed,
Collapse file

‎tests/system/small/test_dataframe.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_dataframe.py
+38-20Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5915,21 +5915,15 @@ def test_dataframe_explode_xfail(col_names):
59155915
pytest.param("datetime_col", "5M", "epoch"),
59165916
pytest.param("datetime_col", "3Q", "start_day"),
59175917
pytest.param("datetime_col", "3YE", "start"),
5918-
pytest.param(
5919-
"int64_col", "100D", "start", marks=pytest.mark.xfail(raises=TypeError)
5920-
),
5921-
pytest.param(
5922-
"datetime_col", "100D", "end", marks=pytest.mark.xfail(raises=ValueError)
5923-
),
59245918
],
59255919
)
5926-
def test__resample_with_column(
5920+
def test_resample_with_column(
59275921
scalars_df_index, scalars_pandas_df_index, on, rule, origin
59285922
):
59295923
# TODO: supply a reason why this isn't compatible with pandas 1.x
59305924
pytest.importorskip("pandas", minversion="2.0.0")
59315925
bf_result = (
5932-
scalars_df_index._resample(rule=rule, on=on, origin=origin)[
5926+
scalars_df_index.resample(rule=rule, on=on, origin=origin)[
59335927
["int64_col", "int64_too"]
59345928
]
59355929
.max()
@@ -5943,30 +5937,54 @@ def test__resample_with_column(
59435937
)
59445938

59455939

5940+
@pytest.mark.parametrize("index_col", ["timestamp_col", "datetime_col"])
5941+
@pytest.mark.parametrize(
5942+
("index_append", "level"),
5943+
[(True, 1), (False, None), (False, 0)],
5944+
)
59465945
@pytest.mark.parametrize(
5947-
("append", "level", "col", "rule"),
5946+
"rule",
59485947
[
5949-
pytest.param(False, None, "timestamp_col", "100d"),
5950-
pytest.param(True, 1, "timestamp_col", "1200h"),
5951-
pytest.param(False, None, "datetime_col", "100d"),
5948+
# TODO(tswast): support timedeltas and dataoffsets.
5949+
# TODO(tswast): support bins that default to "right".
5950+
"100d",
5951+
"1200h",
59525952
],
59535953
)
5954-
def test__resample_with_index(
5955-
scalars_df_index, scalars_pandas_df_index, append, level, col, rule
5954+
# TODO(tswast): support "right"
5955+
@pytest.mark.parametrize("closed", ["left", None])
5956+
# TODO(tswast): support "right"
5957+
@pytest.mark.parametrize("label", ["left", None])
5958+
@pytest.mark.parametrize(
5959+
"origin",
5960+
["epoch", "start", "start_day"], # TODO(tswast): support end, end_day.
5961+
)
5962+
def test_resample_with_index(
5963+
scalars_df_index,
5964+
scalars_pandas_df_index,
5965+
index_append,
5966+
level,
5967+
index_col,
5968+
rule,
5969+
closed,
5970+
origin,
5971+
label,
59565972
):
59575973
# TODO: supply a reason why this isn't compatible with pandas 1.x
59585974
pytest.importorskip("pandas", minversion="2.0.0")
5959-
scalars_df_index = scalars_df_index.set_index(col, append=append)
5960-
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
5975+
scalars_df_index = scalars_df_index.set_index(index_col, append=index_append)
5976+
scalars_pandas_df_index = scalars_pandas_df_index.set_index(
5977+
index_col, append=index_append
5978+
)
59615979
bf_result = (
59625980
scalars_df_index[["int64_col", "int64_too"]]
5963-
._resample(rule=rule, level=level)
5981+
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
59645982
.min()
59655983
.to_pandas()
59665984
)
59675985
pd_result = (
59685986
scalars_pandas_df_index[["int64_col", "int64_too"]]
5969-
.resample(rule=rule, level=level)
5987+
.resample(rule=rule, level=level, closed=closed, origin=origin, label=label)
59705988
.min()
59715989
)
59725990
assert_pandas_df_equal(bf_result, pd_result)
@@ -6010,15 +6028,15 @@ def test__resample_with_index(
60106028
),
60116029
],
60126030
)
6013-
def test__resample_start_time(rule, origin, data):
6031+
def test_resample_start_time(rule, origin, data):
60146032
# TODO: supply a reason why this isn't compatible with pandas 1.x
60156033
pytest.importorskip("pandas", minversion="2.0.0")
60166034
col = "timestamp_col"
60176035
scalars_df_index = bpd.DataFrame(data).set_index(col)
60186036
scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
60196037
scalars_pandas_df_index.index.name = None
60206038

6021-
bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
6039+
bf_result = scalars_df_index.resample(rule=rule, origin=origin).min().to_pandas()
60226040

60236041
pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
60246042

Collapse file

‎tests/system/small/test_series.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_series.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4856,14 +4856,14 @@ def test_series_explode_null(data):
48564856
pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
48574857
],
48584858
)
4859-
def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
4859+
def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
48604860
# TODO: supply a reason why this isn't compatible with pandas 1.x
48614861
pytest.importorskip("pandas", minversion="2.0.0")
48624862
scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
48634863
scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
48644864
"int64_col"
48654865
]
4866-
bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
4866+
bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
48674867
pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
48684868
pd.testing.assert_series_equal(bf_result, pd_result)
48694869

Collapse file

‎tests/system/small/test_unordered.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_unordered.py
+8-4Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,20 +248,24 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
248248
),
249249
],
250250
)
251-
def test__resample_with_index(unordered_session, rule, origin, data):
251+
def test_resample_with_index(unordered_session, rule, origin, data):
252252
# TODO: supply a reason why this isn't compatible with pandas 1.x
253253
pytest.importorskip("pandas", minversion="2.0.0")
254254
col = "timestamp_col"
255255
scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
256256
scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
257257
scalars_pandas_df_index.index.name = None
258258

259-
bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
260-
259+
bf_result = scalars_df_index.resample(rule=rule, origin=origin).min()
261260
pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
262261

262+
assert isinstance(bf_result.index, bpd.DatetimeIndex)
263+
assert isinstance(pd_result.index, pd.DatetimeIndex)
263264
pd.testing.assert_frame_equal(
264-
bf_result, pd_result, check_dtype=False, check_index_type=False
265+
bf_result.to_pandas(),
266+
pd_result,
267+
check_index_type=False,
268+
check_dtype=False,
265269
)
266270

267271

Collapse file

‎tests/unit/test_dataframe.py‎

Copy file name to clipboardExpand all lines: tests/unit/test_dataframe.py
+62Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,68 @@ def test_dataframe_repr_with_uninitialized_object():
4242
assert "DataFrame" in got
4343

4444

45+
@pytest.mark.parametrize(
46+
"rule",
47+
[
48+
pd.DateOffset(weeks=1),
49+
pd.Timedelta(hours=8),
50+
# According to
51+
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html
52+
# these all default to "right" for closed and label, which isn't yet supported.
53+
"ME",
54+
"YE",
55+
"QE",
56+
"BME",
57+
"BA",
58+
"BQE",
59+
"W",
60+
],
61+
)
62+
def test_dataframe_rule_not_implememented(
63+
monkeypatch: pytest.MonkeyPatch,
64+
rule,
65+
):
66+
dataframe = mocks.create_dataframe(monkeypatch)
67+
68+
with pytest.raises(NotImplementedError, match="rule"):
69+
dataframe.resample(rule=rule)
70+
71+
72+
def test_dataframe_closed_not_implememented(
73+
monkeypatch: pytest.MonkeyPatch,
74+
):
75+
dataframe = mocks.create_dataframe(monkeypatch)
76+
77+
with pytest.raises(NotImplementedError, match="Only closed='left'"):
78+
dataframe.resample(rule="1d", closed="right")
79+
80+
81+
def test_dataframe_label_not_implememented(
82+
monkeypatch: pytest.MonkeyPatch,
83+
):
84+
dataframe = mocks.create_dataframe(monkeypatch)
85+
86+
with pytest.raises(NotImplementedError, match="Only label='left'"):
87+
dataframe.resample(rule="1d", label="right")
88+
89+
90+
@pytest.mark.parametrize(
91+
"origin",
92+
[
93+
"end",
94+
"end_day",
95+
],
96+
)
97+
def test_dataframe_origin_not_implememented(
98+
monkeypatch: pytest.MonkeyPatch,
99+
origin,
100+
):
101+
dataframe = mocks.create_dataframe(monkeypatch)
102+
103+
with pytest.raises(NotImplementedError, match="origin"):
104+
dataframe.resample(rule="1d", origin=origin)
105+
106+
45107
def test_dataframe_setattr_with_uninitialized_object():
46108
"""Ensures DataFrame can be subclassed without trying to set attributes as columns."""
47109
# Avoid calling __init__ since it might be called later in a subclass.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.