From a91823f449dba3c9e04ab47fe99e66e8b2f021bd Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 8 Nov 2023 01:05:21 +0000
Subject: [PATCH 1/3] docs: Add docstring code samples for `Series.apply` and
 `DataFrame.map`

---
 .../bigframes_vendored/pandas/core/frame.py   | 57 ++++++++++++++++-
 .../bigframes_vendored/pandas/core/series.py  | 64 +++++++++++++++++--
 2 files changed, 114 insertions(+), 7 deletions(-)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 6f4f6be35d..022fb329ab 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2159,8 +2159,63 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
            In pandas 2.1.0, DataFrame.applymap is deprecated and renamed to
            DataFrame.map.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> @bpd.remote_function([int], float)
+            ... def minutes_to_hours(x):
+            ...     return x/60
+
+            >>> df_minutes = bpd.DataFrame(
+            ...     {"system_minutes" : [0, 30, 60, 90, 120],
+            ...      "user_minutes" : [0, 15, 75, 90, 6]})
+            >>> df_minutes
+            system_minutes  user_minutes
+            0               0             0
+            1              30            15
+            2              60            75
+            3              90            90
+            4             120             6
+            <BLANKLINE>
+            [5 rows x 2 columns]
+
+            >>> df_hours = df_minutes.map(minutes_to_hours)
+            >>> df_hours
+            system_minutes  user_minutes
+            0             0.0           0.0
+            1             0.5          0.25
+            2             1.0          1.25
+            3             1.5           1.5
+            4             2.0           0.1
+            <BLANKLINE>
+            [5 rows x 2 columns]
+
+            If there are ``NA``/``None`` values in the data, you can ignore
+            applying the remote function on such values by specifying
+            ``na_action='ignore'``.
+
+            >>> df_minutes = bpd.DataFrame(
+            ...     {
+            ...         "system_minutes" : [0, 30, 60, None, 90, 120, bpd.NA],
+            ...         "user_minutes" : [0, 15, 75, 90, 6, None, bpd.NA]
+            ...     }, dtype="Int64")
+            >>> df_hours = df_minutes.map(minutes_to_hours, na_action='ignore')
+            >>> df_hours
+            system_minutes  user_minutes
+            0             0.0           0.0
+            1             0.5          0.25
+            2             1.0          1.25
+            3            <NA>           1.5
+            4             1.5           0.1
+            5             2.0          <NA>
+            6            <NA>          <NA>
+            <BLANKLINE>
+            [7 rows x 2 columns]
+
         Args:
-            func:
+            func (function):
                 Python function wrapped by ``remote_function`` decorator,
                 returns a single value from a single value.
             na_action (Optional[str], default None):
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index b569e5699c..c9762b9def 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -728,18 +728,70 @@ def apply(
         func,
     ) -> DataFrame | Series:
         """
-        Invoke function on values of Series.
+        Invoke function on values of a Series.
 
-        Can be ufunc (a NumPy function that applies to the entire Series)
-        or a Python function that only works on single values.
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> @bpd.remote_function([int], float)
+            ... def minutes_to_hours(x):
+            ...     return x/60
+
+            >>> minutes = bpd.Series([0, 30, 60, 90, 120])
+            >>> minutes
+            0      0
+            1     30
+            2     60
+            3     90
+            4    120
+            dtype: Int64
+
+            >>> hours = minutes.apply(minutes_to_hours)
+            >>> hours
+            0    0.0
+            1    0.5
+            2    1.0
+            3    1.5
+            4    2.0
+            dtype: Float64
+
+            You could turn a user defined function with external package
+            dependencies into a BigQuery DataFrames remote function. You would
+            provide the names of the packages via ``packages`` param.
+
+            >>> @bpd.remote_function(
+            ...     [str],
+            ...     str,
+            ...     bigquery_connection="bigframes-rf-conn",
+            ...     reuse=False,
+            ...     packages=["cryptography"],
+            ... )
+            ... def get_hash(input):
+            ...     from cryptography.fernet import Fernet
+            ...
+            ...     # handle missing value
+            ...     if input is None:
+            ...         input = ""
+            ...
+            ...     key = Fernet.generate_key()
+            ...     f = Fernet(key)
+            ...     return f.encrypt(input.encode()).decode()
+
+            >>> names = bpd.Series(["Alice", "Bob"])
+            >>> hashes = names.apply(get_hash)
 
         Args:
             func (function):
-                Python function or NumPy ufunc to apply.
+                BigFrames DataFrames ``remote_function`` to apply. The function
+                should take a scalar and return a scalar. It will be applied to
+                every element in the ``Series``.
 
         Returns:
-            bigframes.series.Series: If func returns a Series object the result
-                will be a DataFrame.
+            bigframes.series.Series: A new Series with values representing the
+            return value of the ``func`` applied to each element of the original
+            Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From 7e14da38d2ecf01b7d0a19939e623b9bc5244661 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Wed, 8 Nov 2023 06:50:36 +0000
Subject: [PATCH 2/3] improved docstring with concurrency-safe code samples

---
 third_party/bigframes_vendored/pandas/core/frame.py  | 7 ++++++-
 third_party/bigframes_vendored/pandas/core/series.py | 8 ++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 022fb329ab..783f2d286e 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2164,7 +2164,12 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> @bpd.remote_function([int], float)
+            Let's use ``reuse=False`` flag to make sure a new ``remote_function``
+            is created every time we run the following code, but you can skip it
+            to potentially reuse a previously deployed ``remote_function`` from
+            the same user defined function.
+
+            >>> @bpd.remote_function([int], float, reuse=False)
             ... def minutes_to_hours(x):
             ...     return x/60
 
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index c9762b9def..0cab57cfc6 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -735,7 +735,12 @@ def apply(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> @bpd.remote_function([int], float)
+            Let's use ``reuse=False`` flag to make sure a new ``remote_function``
+            is created every time we run the following code, but you can skip it
+            to potentially reuse a previously deployed ``remote_function`` from
+            the same user defined function.
+
+            >>> @bpd.remote_function([int], float, reuse=False)
             ... def minutes_to_hours(x):
             ...     return x/60
 
@@ -764,7 +769,6 @@ def apply(
             >>> @bpd.remote_function(
             ...     [str],
             ...     str,
-            ...     bigquery_connection="bigframes-rf-conn",
             ...     reuse=False,
             ...     packages=["cryptography"],
             ... )

From d89b3e5b3b8d58a51445c88a53ba879463b8fc23 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 9 Nov 2023 00:15:21 +0000
Subject: [PATCH 3/3] Correct indentation of text in code samples

---
 .../bigframes_vendored/pandas/core/frame.py        | 14 +++++++-------
 .../bigframes_vendored/pandas/core/series.py       | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 783f2d286e..088e226c20 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -2164,10 +2164,10 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            Let's use ``reuse=False`` flag to make sure a new ``remote_function``
-            is created every time we run the following code, but you can skip it
-            to potentially reuse a previously deployed ``remote_function`` from
-            the same user defined function.
+        Let's use ``reuse=False`` flag to make sure a new ``remote_function``
+        is created every time we run the following code, but you can skip it
+        to potentially reuse a previously deployed ``remote_function`` from
+        the same user defined function.
 
             >>> @bpd.remote_function([int], float, reuse=False)
             ... def minutes_to_hours(x):
@@ -2197,9 +2197,9 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
             <BLANKLINE>
             [5 rows x 2 columns]
 
-            If there are ``NA``/``None`` values in the data, you can ignore
-            applying the remote function on such values by specifying
-            ``na_action='ignore'``.
+        If there are ``NA``/``None`` values in the data, you can ignore
+        applying the remote function on such values by specifying
+        ``na_action='ignore'``.
 
             >>> df_minutes = bpd.DataFrame(
             ...     {
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 0cab57cfc6..55c53fd1eb 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -735,10 +735,10 @@ def apply(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            Let's use ``reuse=False`` flag to make sure a new ``remote_function``
-            is created every time we run the following code, but you can skip it
-            to potentially reuse a previously deployed ``remote_function`` from
-            the same user defined function.
+        Let's use ``reuse=False`` flag to make sure a new ``remote_function``
+        is created every time we run the following code, but you can skip it
+        to potentially reuse a previously deployed ``remote_function`` from
+        the same user defined function.
 
             >>> @bpd.remote_function([int], float, reuse=False)
             ... def minutes_to_hours(x):
@@ -762,9 +762,9 @@ def apply(
             4    2.0
             dtype: Float64
 
-            You could turn a user defined function with external package
-            dependencies into a BigQuery DataFrames remote function. You would
-            provide the names of the packages via ``packages`` param.
+        You could turn a user defined function with external package
+        dependencies into a BigQuery DataFrames remote function. You would
+        provide the names of the packages via ``packages`` param.
 
             >>> @bpd.remote_function(
             ...     [str],