From 97e857e428c5194741e42c115cde46ffe323bf3f Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 22 Nov 2023 01:59:45 +0000 Subject: [PATCH] docs: code samples for `Series.dot` and `DataFrame.dot` --- bigframes/dataframe.py | 3 +- bigframes/operations/base.py | 2 +- tests/system/small/test_dataframe.py | 23 ++++++ .../bigframes_vendored/pandas/core/frame.py | 71 +++++++++++++++++++ .../bigframes_vendored/pandas/core/series.py | 15 ++++ 5 files changed, 112 insertions(+), 2 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1f1275e217..8567296e29 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2797,7 +2797,8 @@ def get_right_id(id): result = result[other_frame.columns] if isinstance(other, bf_series.Series): - result = result[other.name].rename() + # There should be exactly one column in the result + result = result[result.columns[0]].rename() return result diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index d33befe4da..85ce1dd9e6 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -141,7 +141,7 @@ def _apply_binary_op( if isinstance(other, pd.Series): # TODO: Convert to BigQuery DataFrames series raise NotImplementedError( - f"Pandas series not supported supported as operand. {constants.FEEDBACK_LINK}" + f"Pandas series not supported as operand. {constants.FEEDBACK_LINK}" ) if isinstance(other, series.Series): (left, right, block) = self._align(other, how=alignment) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index a0cf25807c..e25e9ce501 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3493,6 +3493,29 @@ def test_df_dot_operator( ) +def test_df_dot_series_inline(): + left = [[1, 2, 3], [2, 5, 7]] + right = [2, 1, 3] + + bf1 = dataframe.DataFrame(left) + bf2 = series.Series(right) + bf_result = bf1.dot(bf2).to_pandas() + + df1 = pd.DataFrame(left) + df2 = pd.Series(right) + pd_result = df1.dot(df2) + + # Patch pandas dtypes for testing parity + # Pandas result is int64 instead of Int64 (nullable) dtype. + pd_result = pd_result.astype(pd.Int64Dtype()) + pd_result.index = pd_result.index.astype(pd.Int64Dtype()) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + ) + + def test_df_dot_series( matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df ): diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index f448ad7939..339edeb7a5 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -3321,6 +3321,77 @@ def dot(self, other): The dot method for Series computes the inner product, instead of the matrix product here. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> left = bpd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + >>> left + 0 1 2 3 + 0 0 1 -2 -1 + 1 1 1 1 1 + + [2 rows x 4 columns] + >>> right = bpd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> right + 0 1 + 0 0 1 + 1 1 2 + 2 -1 -1 + 3 2 0 + + [4 rows x 2 columns] + >>> left.dot(right) + 0 1 + 0 1 4 + 1 2 2 + + [2 rows x 2 columns] + + You can also use the operator ``@`` for the dot product: + + >>> left @ right + 0 1 + 0 1 4 + 1 2 2 + + [2 rows x 2 columns] + + The right input can be a Series, in which case the result will also be a + Series: + + >>> right = bpd.Series([1, 2, -1,0]) + >>> left @ right + 0 4 + 1 2 + dtype: Int64 + + Any user defined index of the left matrix and columns of the right + matrix will reflect in the result. + + >>> left = bpd.DataFrame([[1, 2, 3], [2, 5, 7]], index=["alpha", "beta"]) + >>> left + 0 1 2 + alpha 1 2 3 + beta 2 5 7 + + [2 rows x 3 columns] + >>> right = bpd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]], columns=["red", "green", "blue"]) + >>> right + red green blue + 0 2 4 8 + 1 1 5 10 + 2 3 6 9 + + [3 rows x 3 columns] + >>> left.dot(right) + red green blue + alpha 13 32 55 + beta 30 75 129 + + [2 rows x 3 columns] + Args: other (Series or DataFrame): The other object to compute the matrix product with. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index a86765a412..1b751ed83b 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -631,6 +631,21 @@ def dot(self, other) -> Series | np.ndarray: BigQuery Dataframes does not validate this property and will produce incorrect results if indices are not equal. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([0, 1, 2, 3]) + >>> other = bpd.Series([-1, 2, -3, 4]) + >>> s.dot(other) + 8 + + You can also use the operator ``@`` for the dot product: + + >>> s @ other + 8 + Args: other (Series): The other object to compute the dot product with its columns.