Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

feat: add 'cross' join support #176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 1 bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ def join(
"left",
"outer",
"right",
"cross",
],
allow_row_identity_join: bool = True,
):
Expand Down
1 change: 1 addition & 0 deletions 1 bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,7 @@ def merge(
"left",
"outer",
"right",
"cross",
],
left_join_ids: typing.Sequence[str],
right_join_ids: typing.Sequence[str],
Expand Down
3 changes: 2 additions & 1 deletion 3 bigframes/core/compile/single_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def join_by_column(
"left",
"outer",
"right",
"cross",
],
allow_row_identity_join: bool = True,
) -> compiled.CompiledArrayValue:
Expand Down Expand Up @@ -107,7 +108,7 @@ def join_by_column(
left_table,
right_table,
predicates=join_conditions,
how=how,
how=how, # type: ignore
)

# Preserve ordering accross joins.
Expand Down
1 change: 1 addition & 0 deletions 1 bigframes/core/joins/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def merge(
"left",
"outer",
"right",
"cross",
] = "inner",
on: Optional[str] = None,
*,
Expand Down
1 change: 1 addition & 0 deletions 1 bigframes/core/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class JoinNode(BigFrameNode):
"left",
"outer",
"right",
"cross",
]
allow_row_identity_join: bool = True

Expand Down
26 changes: 26 additions & 0 deletions 26 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1933,6 +1933,7 @@ def merge(
"left",
"outer",
"right",
"cross",
] = "inner",
# TODO(garrettwu): Currently can take inner, outer, left and right. To support
# cross joins
Expand All @@ -1943,6 +1944,19 @@ def merge(
sort: bool = False,
suffixes: tuple[str, str] = ("_x", "_y"),
) -> DataFrame:
if how == "cross":
if on is not None:
raise ValueError("'on' is not supported for cross join.")
result_block = self._block.merge(
right._block,
left_join_ids=[],
right_join_ids=[],
suffixes=suffixes,
how=how,
sort=True,
)
return DataFrame(result_block)

if on is None:
if left_on is None or right_on is None:
raise ValueError("Must specify `on` or `left_on` + `right_on`.")
Expand Down Expand Up @@ -1996,6 +2010,18 @@ def join(
raise NotImplementedError(
f"Deduping column names is not implemented. {constants.FEEDBACK_LINK}"
)
if how == "cross":
if on is not None:
raise ValueError("'on' is not supported for cross join.")
result_block = left._block.merge(
right._block,
left_join_ids=[],
right_join_ids=[],
suffixes=("", ""),
how="cross",
sort=True,
)
return DataFrame(result_block)

# Join left columns with right index
if on is not None:
Expand Down
1 change: 1 addition & 0 deletions 1 bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ def merge(
"left",
"outer",
"right",
"cross",
] = "inner",
on: Optional[str] = None,
*,
Expand Down
44 changes: 32 additions & 12 deletions 44 tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,26 @@ def test_df_isin_dict(scalars_dfs):
pandas.testing.assert_frame_equal(bf_result, pd_result.astype("boolean"))


def test_df_cross_merge(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
left_columns = ["int64_col", "float64_col", "rowindex_2"]
right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]

left = scalars_df[left_columns]
# Offset the rows somewhat so that outer join can have an effect.
right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)

bf_result = left.merge(right, "cross").to_pandas()

pd_result = scalars_pandas_df[left_columns].merge(
scalars_pandas_df[right_columns].assign(
rowindex_2=scalars_pandas_df["rowindex_2"] + 2
),
"cross",
)
pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)


@pytest.mark.parametrize(
("merge_how",),
[
Expand Down Expand Up @@ -1745,12 +1765,7 @@ def test_series_binop_add_different_table(

all_joins = pytest.mark.parametrize(
("how",),
(
("outer",),
("left",),
("right",),
("inner",),
),
(("outer",), ("left",), ("right",), ("inner",), ("cross",)),
)


Expand Down Expand Up @@ -1795,13 +1810,18 @@ def test_join_param_on(scalars_dfs, how):
bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
bf_df_b = bf_df[["float64_col"]]
bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas()

pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
pd_df_b = pd_df[["float64_col"]]
pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
if how == "cross":
with pytest.raises(ValueError):
bf_df_a.join(bf_df_b, on="rowindex_2", how=how)
else:
bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas()

pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
pd_df_b = pd_df[["float64_col"]]
pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)


@pytest.mark.parametrize(
Expand Down
21 changes: 21 additions & 0 deletions 21 tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,27 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)


def test_pd_merge_cross(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
left_columns = ["int64_col", "float64_col", "int64_too"]
right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]

left = scalars_df[left_columns]
right = scalars_df[right_columns]

df = bpd.merge(left, right, "cross", sort=True)
bf_result = df.to_pandas()

pd_result = pd.merge(
scalars_pandas_df[left_columns],
scalars_pandas_df[right_columns],
"cross",
sort=True,
)

pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)


@pytest.mark.parametrize(
("merge_how",),
[
Expand Down
5 changes: 5 additions & 0 deletions 5 third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2194,6 +2194,8 @@ def join(self, other, *, on: Optional[str] = None, how: str) -> DataFrame:
and sort it lexicographically. ``inner``: form intersection of
calling frame's index (or column if on is specified) with `other`'s
index, preserving the order of the calling's one.
``cross``: creates the cartesian product from both frames, preserves
the order of the left keys.

Returns:
bigframes.dataframe.DataFrame: A dataframe containing columns from both the caller and `other`.
Expand All @@ -2208,6 +2210,7 @@ def merge(
"left",
"outer",
"right",
"cross",
] = "inner",
on: Optional[str] = None,
*,
Expand Down Expand Up @@ -2243,6 +2246,8 @@ def merge(
join; sort keys lexicographically.
``inner``: use intersection of keys from both frames, similar to a SQL inner
join; preserve the order of the left keys.
``cross``: creates the cartesian product from both frames, preserves the order
of the left keys.

on (label or list of labels):
Columns to join on. It must be found in both DataFrames. Either on or left_on + right_on
Expand Down
2 changes: 2 additions & 0 deletions 2 third_party/bigframes_vendored/pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def merge(
join; sort keys lexicographically.
``inner``: use intersection of keys from both frames, similar to a SQL inner
join; preserve the order of the left keys.
``cross``: creates the cartesian product from both frames, preserves the order
of the left keys.

on (label or list of labels):
Columns to join on. It must be found in both DataFrames. Either on or left_on + right_on
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.