diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 10f73d25b7..b35d0f3b2e 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2159,8 +2159,68 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: In pandas 2.1.0, DataFrame.applymap is deprecated and renamed to DataFrame.map. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Let's use ``reuse=False`` flag to make sure a new ``remote_function`` + is created every time we run the following code, but you can skip it + to potentially reuse a previously deployed ``remote_function`` from + the same user defined function. + + >>> @bpd.remote_function([int], float, reuse=False) + ... def minutes_to_hours(x): + ... return x/60 + + >>> df_minutes = bpd.DataFrame( + ... {"system_minutes" : [0, 30, 60, 90, 120], + ... "user_minutes" : [0, 15, 75, 90, 6]}) + >>> df_minutes + system_minutes user_minutes + 0 0 0 + 1 30 15 + 2 60 75 + 3 90 90 + 4 120 6 + + [5 rows x 2 columns] + + >>> df_hours = df_minutes.map(minutes_to_hours) + >>> df_hours + system_minutes user_minutes + 0 0.0 0.0 + 1 0.5 0.25 + 2 1.0 1.25 + 3 1.5 1.5 + 4 2.0 0.1 + + [5 rows x 2 columns] + + If there are ``NA``/``None`` values in the data, you can ignore + applying the remote function on such values by specifying + ``na_action='ignore'``. + + >>> df_minutes = bpd.DataFrame( + ... { + ... "system_minutes" : [0, 30, 60, None, 90, 120, bpd.NA], + ... "user_minutes" : [0, 15, 75, 90, 6, None, bpd.NA] + ... }, dtype="Int64") + >>> df_hours = df_minutes.map(minutes_to_hours, na_action='ignore') + >>> df_hours + system_minutes user_minutes + 0 0.0 0.0 + 1 0.5 0.25 + 2 1.0 1.25 + 3 1.5 + 4 1.5 0.1 + 5 2.0 + 6 + + [7 rows x 2 columns] + Args: - func: + func (function): Python function wrapped by ``remote_function`` decorator, returns a single value from a single value. na_action (Optional[str], default None): diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index b25b0c75cf..c6d98075f5 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -728,18 +728,74 @@ def apply( func, ) -> DataFrame | Series: """ - Invoke function on values of Series. + Invoke function on values of a Series. - Can be ufunc (a NumPy function that applies to the entire Series) - or a Python function that only works on single values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Let's use ``reuse=False`` flag to make sure a new ``remote_function`` + is created every time we run the following code, but you can skip it + to potentially reuse a previously deployed ``remote_function`` from + the same user defined function. + + >>> @bpd.remote_function([int], float, reuse=False) + ... def minutes_to_hours(x): + ... return x/60 + + >>> minutes = bpd.Series([0, 30, 60, 90, 120]) + >>> minutes + 0 0 + 1 30 + 2 60 + 3 90 + 4 120 + dtype: Int64 + + >>> hours = minutes.apply(minutes_to_hours) + >>> hours + 0 0.0 + 1 0.5 + 2 1.0 + 3 1.5 + 4 2.0 + dtype: Float64 + + You could turn a user defined function with external package + dependencies into a BigQuery DataFrames remote function. You would + provide the names of the packages via ``packages`` param. + + >>> @bpd.remote_function( + ... [str], + ... str, + ... reuse=False, + ... packages=["cryptography"], + ... ) + ... def get_hash(input): + ... from cryptography.fernet import Fernet + ... + ... # handle missing value + ... if input is None: + ... input = "" + ... + ... key = Fernet.generate_key() + ... f = Fernet(key) + ... return f.encrypt(input.encode()).decode() + + >>> names = bpd.Series(["Alice", "Bob"]) + >>> hashes = names.apply(get_hash) Args: func (function): - Python function or NumPy ufunc to apply. + BigFrames DataFrames ``remote_function`` to apply. The function + should take a scalar and return a scalar. It will be applied to + every element in the ``Series``. Returns: - bigframes.series.Series: If func returns a Series object the result - will be a DataFrame. + bigframes.series.Series: A new Series with values representing the + return value of the ``func`` applied to each element of the original + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)