From f530e53277fee0e91432944754bfd5084afce22c Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Sat, 18 Jan 2025 01:04:27 +0000 Subject: [PATCH] feat: DataFrame.join supports Series other --- bigframes/dataframe.py | 9 +++++++- tests/system/small/test_dataframe.py | 21 +++++++++++++++++++ .../bigframes_vendored/pandas/core/frame.py | 2 +- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index e1bb885558..a4d267fcde 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -3039,8 +3039,15 @@ def merge( return DataFrame(block) def join( - self, other: DataFrame, *, on: Optional[str] = None, how: str = "left" + self, + other: Union[DataFrame, bigframes.series.Series], + *, + on: Optional[str] = None, + how: str = "left", ) -> DataFrame: + if isinstance(other, bigframes.series.Series): + other = other.to_frame() + left, right = self, other if not left.columns.intersection(right.columns).empty: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 5b94df2446..e7d6ad67e1 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2554,6 +2554,27 @@ def test_join_param_on(scalars_dfs, how): assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) +@all_joins +def test_df_join_series(scalars_dfs, how): + bf_df, pd_df = scalars_dfs + + bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]] + bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2) + bf_series_b = bf_df["float64_col"] + + if how == "cross": + with pytest.raises(ValueError): + bf_df_a.join(bf_series_b, on="rowindex_2", how=how) + else: + bf_result = bf_df_a.join(bf_series_b, on="rowindex_2", how=how).to_pandas() + + pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]] + pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2) + pd_series_b = pd_df["float64_col"] + pd_result = pd_df_a.join(pd_series_b, on="rowindex_2", how=how) + assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) + + @pytest.mark.parametrize( ("by", "ascending", "na_position"), [ diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index a44d6b629f..c8ca1b74b5 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4384,7 +4384,7 @@ def join(self, other, *, on: Optional[str] = None, how: str) -> DataFrame: Args: other: - DataFrame with an Index similar to the Index of this one. + DataFrame or Series with an Index similar to the Index of this one. on: Column in the caller to join on the index in other, otherwise joins index-on-index. Like an Excel VLOOKUP operation.