From a280ff18083232fb02f42a4582c7428297e2dd26 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 9 Apr 2024 23:30:17 +0000
Subject: [PATCH 1/6] feat: Add hasnans, combine_first, update to Series

---
 bigframes/core/convert.py                     |   4 +-
 bigframes/series.py                           |  21 ++-
 tests/system/small/test_series.py             |  50 +++++++
 .../bigframes_vendored/pandas/core/series.py  | 138 ++++++++++++++++++
 4 files changed, 211 insertions(+), 2 deletions(-)

diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py
index 98f854ad72..268460298a 100644
--- a/bigframes/core/convert.py
+++ b/bigframes/core/convert.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 from __future__ import annotations
 
+from typing import Optional
+
 import pandas as pd
 
 import bigframes.core.indexes as index
 import bigframes.series as series
 
 
-def to_bf_series(obj, default_index: index.Index) -> series.Series:
+def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
     if isinstance(obj, series.Series):
         return obj
     if isinstance(obj, pd.Series):
diff --git a/bigframes/series.py b/bigframes/series.py
index 185891bc01..2b5abb82cd 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -22,7 +22,7 @@
 import os
 import textwrap
 import typing
-from typing import Any, Literal, Mapping, Optional, Tuple, Union
+from typing import Any, Literal, Mapping, Optional, Sequence, Tuple, Union
 
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
 import google.cloud.bigquery as bigquery
@@ -130,6 +130,11 @@ def ndim(self) -> int:
     def empty(self) -> bool:
         return self.shape[0] == 0
 
+    @property
+    def hasnans(self) -> bool:
+        # Note, hasnans is actually a null check, and NaNs don't count for nullable float
+        return self.isnull().any()
+
     @property
     def values(self) -> numpy.ndarray:
         return self.to_numpy()
@@ -753,6 +758,20 @@ def __matmul__(self, other):
 
     dot = __matmul__
 
+    def combine_first(self, other: Series) -> Series:
+        result = self._apply_binary_op(other, ops.coalesce_op)
+        result.name = self.name
+        return result
+
+    def update(self, other: Union[Series | Sequence | Mapping]) -> None:
+        import bigframes.core.convert
+
+        other = bigframes.core.convert.to_bf_series(other, default_index=None)
+        result = self._apply_binary_op(
+            other, ops.coalesce_op, reverse=True, alignment="left"
+        )
+        self._set_block(result._get_block())
+
     def abs(self) -> Series:
         return self._apply_unary_op(ops.abs_op)
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index c882677508..8436ce625e 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1261,6 +1261,38 @@ def test_binop_right_filtered(scalars_dfs):
     )
 
 
+def test_series_combine_first(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    int64_col = scalars_df["int64_col"].head(7)
+    float64_col = scalars_df["float64_col"].tail(7)
+    bf_result = int64_col.combine_first(float64_col).to_pandas()
+
+    pd_int64_col = scalars_pandas_df["int64_col"].head(7)
+    pd_float64_col = scalars_pandas_df["float64_col"].tail(7)
+    pd_result = pd_int64_col.combine_first(pd_float64_col)
+
+    assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_series_update(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    int64_col = scalars_df["int64_col"].head(7)
+    float64_col = scalars_df["float64_col"].tail(7)
+    float64_col.update(int64_col)
+
+    pd_int64_col = scalars_pandas_df["int64_col"].head(7)
+    pd_float64_col = scalars_pandas_df["float64_col"].tail(7)
+    pd_float64_col.update(pd_int64_col)
+
+    assert_series_equal(
+        float64_col.to_pandas(),
+        pd_float64_col,
+    )
+
+
 def test_mean(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "int64_col"
@@ -1649,6 +1681,24 @@ def test_size(scalars_dfs):
     assert pd_result == bf_result
 
 
+def test_series_hasnans_true(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df["string_col"].hasnans
+    pd_result = scalars_pandas_df["string_col"].hasnans
+
+    assert pd_result == bf_result
+
+
+def test_series_hasnans_false(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df["string_col"].dropna().hasnans
+    pd_result = scalars_pandas_df["string_col"].dropna().hasnans
+
+    assert pd_result == bf_result
+
+
 def test_empty_false(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index a75d6c2167..c6a806f8e0 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -175,6 +175,31 @@ def name(self) -> Hashable:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    @property
+    def hasnans(self) -> bool:
+        """
+        Return True if there are any NaNs.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3, None])
+            >>> s
+            0     1.0
+            1     2.0
+            2     3.0
+            3    <NA>
+            dtype: Float64
+            >>> s.hasnans
+            True
+
+        Returns:
+            bool
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     @property
     def T(self) -> Series:
         """Return the transpose, which is by definition self.
@@ -2343,6 +2368,119 @@ def rdivmod(self, other) -> Series:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def combine_first(self, other) -> Series:
+        """
+        Update null elements with value in the same location in 'other'.
+
+        Combine two Series objects by filling null values in one Series with
+        non-null values from the other Series. Result index will be the union
+        of the two indexes.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> import numpy as np
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s1 = bpd.Series([1, np.nan])
+            >>> s2 = bpd.Series([3, 4, 5])
+            >>> s1.combine_first(s2)
+            0    1.0
+            1    4.0
+            2    5.0
+            dtype: Float64
+
+            Null values still persist if the location of that null value
+            does not exist in `other`
+
+            >>> s1 = bpd.Series({'falcon': np.nan, 'eagle': 160.0})
+            >>> s2 = bpd.Series({'eagle': 200.0, 'duck': 30.0})
+            >>> s1.combine_first(s2)
+            falcon     <NA>
+            eagle     160.0
+            duck       30.0
+            dtype: Float64
+
+        Args:
+            other (Series):
+                The value(s) to be used for filling null values.
+
+        Returns:
+            Series: The result of combining the provided Series with the other object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def update(self, other: Series | Sequence | Mapping) -> None:
+        """
+        Modify Series in place using values from passed Series.
+
+        Uses non-NA values from passed Series to make updates. Aligns
+        on index.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> import pandas as pd
+            >>> import numpy as np
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s.update(bpd.Series([4, 5, 6]))
+            >>> s
+            0    4
+            1    5
+            2    6
+            dtype: Int64
+
+            >>> s = bpd.Series(['a', 'b', 'c'])
+            >>> s.update(bpd.Series(['d', 'e'], index=[0, 2]))
+            >>> s
+            0    d
+            1    b
+            2    e
+            dtype: string
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s.update(bpd.Series([4, 5, 6, 7, 8]))
+            >>> s
+            0    4
+            1    5
+            2    6
+            dtype: Int64
+
+            If ``other`` contains NaNs the corresponding values are not updated
+            in the original Series.
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s.update(bpd.Series([4, np.nan, 6], dtype=pd.Int64Dtype()))
+            >>> s
+            0    4
+            1    2
+            2    6
+            dtype: Int64
+
+            ``other`` can also be a non-Series object type
+            that is coercible into a Series
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s.update([4, np.nan, 6])
+            >>> s
+            0    4.0
+            1    2.0
+            2    6.0
+            dtype: Float64
+
+            >>> s = bpd.Series([1, 2, 3])
+            >>> s.update({1: 9})
+            >>> s
+            0    1
+            1    9
+            2    3
+            dtype: Int64
+
+        Args:
+            other (Series, or object coercible into Series)
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def all(
         self,
     ):

From 9f8b033aa6e256d44e110144b4cbd580238dcd2a Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 10 Apr 2024 00:12:50 +0000
Subject: [PATCH 2/6] fix type annotation for series.update

---
 bigframes/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 2b5abb82cd..b975979eaf 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -763,7 +763,7 @@ def combine_first(self, other: Series) -> Series:
         result.name = self.name
         return result
 
-    def update(self, other: Union[Series | Sequence | Mapping]) -> None:
+    def update(self, other: Union[Series, Sequence, Mapping]) -> None:
         import bigframes.core.convert
 
         other = bigframes.core.convert.to_bf_series(other, default_index=None)

From 4bac86232873f48f03043345a980e373e2ce5760 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 10 Apr 2024 00:33:20 +0000
Subject: [PATCH 3/6] fix type annoation

---
 third_party/bigframes_vendored/pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index c6a806f8e0..572f29ff17 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -2409,7 +2409,7 @@ def combine_first(self, other) -> Series:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def update(self, other: Series | Sequence | Mapping) -> None:
+    def update(self, other) -> None:
         """
         Modify Series in place using values from passed Series.
 

From c6115ad22568e05d4d3b893cb891b1272ef283c4 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 10 Apr 2024 16:51:57 +0000
Subject: [PATCH 4/6] skip combine_first test for legacy pandas

---
 tests/system/small/test_series.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 8436ce625e..d995ceed67 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1261,6 +1261,7 @@ def test_binop_right_filtered(scalars_dfs):
     )
 
 
+@skip_legacy_pandas
 def test_series_combine_first(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     int64_col = scalars_df["int64_col"].head(7)

From b99f0d660386c66bf222f7b2252a6d00d63fca29 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 10 Apr 2024 20:36:18 +0000
Subject: [PATCH 5/6] add docstrings to conversion utils

---
 bigframes/core/convert.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py
index 268460298a..1ef329b0c7 100644
--- a/bigframes/core/convert.py
+++ b/bigframes/core/convert.py
@@ -22,6 +22,18 @@
 
 
 def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
+    """
+    Convert a an object to a bigframes series
+
+    Args:
+        obj (list-like or Series):
+            Object to convert to bigframes Series
+        default_index (list-like or Index or None):
+            Index to use if obj has no index
+
+    Returns
+        bigframes.pandas.Series
+    """
     if isinstance(obj, series.Series):
         return obj
     if isinstance(obj, pd.Series):
@@ -37,6 +49,18 @@ def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
 
 
 def to_pd_series(obj, default_index: pd.Index) -> pd.Series:
+    """
+    Convert a an object to a pandas series
+
+    Args:
+        obj (list-like or Series):
+            Object to convert to pandas Series
+        default_index (list-like or Index or None):
+            Index to use if obj has no index
+
+    Returns
+        pandas.Series
+    """
     if isinstance(obj, series.Series):
         return obj.to_pandas()
     if isinstance(obj, pd.Series):

From e74df0b33a922f76509b79eb811feffe4c03fc68 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 10 Apr 2024 22:38:51 +0000
Subject: [PATCH 6/6] fix update test side effect

---
 tests/system/small/test_series.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index d995ceed67..c93af1bf2f 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1281,11 +1281,11 @@ def test_series_combine_first(scalars_dfs):
 def test_series_update(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     int64_col = scalars_df["int64_col"].head(7)
-    float64_col = scalars_df["float64_col"].tail(7)
+    float64_col = scalars_df["float64_col"].tail(7).copy()
     float64_col.update(int64_col)
 
     pd_int64_col = scalars_pandas_df["int64_col"].head(7)
-    pd_float64_col = scalars_pandas_df["float64_col"].tail(7)
+    pd_float64_col = scalars_pandas_df["float64_col"].tail(7).copy()
     pd_float64_col.update(pd_int64_col)
 
     assert_series_equal(