From da78ddbb89aa554e6662414a78304aed6420b30f Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Mon, 26 Feb 2024 20:20:31 +0000 Subject: [PATCH] fix: rename cosine_similarity to paired_cosine_distances --- bigframes/ml/metrics/pairwise.py | 8 +++++--- tests/system/small/ml/test_metrics_pairwise.py | 6 +++--- .../bigframes_vendored/sklearn/metrics/pairwise.py | 11 +++-------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/bigframes/ml/metrics/pairwise.py b/bigframes/ml/metrics/pairwise.py index 04577c89d3..35b64c7850 100644 --- a/bigframes/ml/metrics/pairwise.py +++ b/bigframes/ml/metrics/pairwise.py @@ -20,7 +20,7 @@ import third_party.bigframes_vendored.sklearn.metrics.pairwise as vendored_metrics_pairwise -def cosine_similarity( +def paired_cosine_distances( X: Union[bpd.DataFrame, bpd.Series], Y: Union[bpd.DataFrame, bpd.Series] ) -> bpd.DataFrame: X, Y = utils.convert_to_dataframe(X, Y) @@ -28,7 +28,9 @@ def cosine_similarity( raise ValueError("Inputs X and Y can only contain 1 column.") base_bqml = core.BaseBqml(session=X._session) - return base_bqml.distance(X, Y, type="COSINE", name="cosine_similarity") + return base_bqml.distance(X, Y, type="COSINE", name="cosine_distance") -cosine_similarity.__doc__ = inspect.getdoc(vendored_metrics_pairwise.cosine_similarity) +paired_cosine_distances.__doc__ = inspect.getdoc( + vendored_metrics_pairwise.paired_cosine_distances +) diff --git a/tests/system/small/ml/test_metrics_pairwise.py b/tests/system/small/ml/test_metrics_pairwise.py index c02a36abbc..47bd1e18d0 100644 --- a/tests/system/small/ml/test_metrics_pairwise.py +++ b/tests/system/small/ml/test_metrics_pairwise.py @@ -19,15 +19,15 @@ import bigframes.pandas as bpd -def test_cosine_similarity(): +def test_paired_cosine_distances(): x_col = [np.array([4.1, 0.5, 1.0])] y_col = [np.array([3.0, 0.0, 2.5])] X = bpd.read_pandas(pd.DataFrame({"X": x_col})) Y = bpd.read_pandas(pd.DataFrame({"Y": y_col})) - result = metrics.pairwise.cosine_similarity(X, Y) + result = metrics.pairwise.paired_cosine_distances(X, Y) expected_pd_df = pd.DataFrame( - {"X": x_col, "Y": y_col, "cosine_similarity": [0.108199]} + {"X": x_col, "Y": y_col, "cosine_distance": [0.108199]} ) pd.testing.assert_frame_equal( diff --git a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py index 3ef5431178..c309b08d88 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/pairwise.py +++ b/third_party/bigframes_vendored/sklearn/metrics/pairwise.py @@ -11,13 +11,8 @@ import bigframes.pandas as bpd -def cosine_similarity(X, Y) -> bpd.DataFrame: - """Compute cosine similarity between samples in X and Y. - - Cosine similarity, or the cosine kernel, computes similarity as the - normalized dot product of X and Y: - - K(X, Y) = / (||X||*||Y||) +def paired_cosine_distances(X, Y) -> bpd.DataFrame: + """Compute the paired cosine distances between X and Y. Args: X (Series or single column DataFrame of array of numeric type): @@ -26,6 +21,6 @@ def cosine_similarity(X, Y) -> bpd.DataFrame: Input data. X and Y are mapped by indexes, must have the same index. Returns: - bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_similarity + bigframes.dataframe.DataFrame: DataFrame with columns of X, Y and cosine_distance """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)