From 229b07a5f447562cb622cf0a2a69e6afe1b7659a Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 22 Dec 2023 02:59:21 +0000 Subject: [PATCH 1/2] docs: code samples for `isna`, `isnull`, `dropna`, `isin` --- .../bigframes_vendored/pandas/core/frame.py | 81 +++++++++++++++++++ .../bigframes_vendored/pandas/core/generic.py | 65 +++++++++++++++ .../bigframes_vendored/pandas/core/series.py | 70 ++++++++++++++++ 3 files changed, 216 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 427e586c52..70c0ad483b 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1202,9 +1202,57 @@ def duplicated(self, subset=None, keep="first"): def dropna( self, + *, + axis: int | str = 0, + how: str = "any", + ignore_index=False, ) -> DataFrame: """Remove missing values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], + ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], + ... "born": [bpd.NA, "1940-04-25", bpd.NA]}) + >>> df + name toy born + 0 Alfred + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip + + [3 rows x 3 columns] + + Drop the rows where at least one element is missing: + + >>> df.dropna() + name toy born + 1 Batman Batmobile 1940-04-25 + + [1 rows x 3 columns] + + Drop the columns where at least one element is missing. + + >>> df.dropna(axis='columns') + name + 0 Alfred + 1 Batman + 2 Catwoman + + [3 rows x 1 columns] + + Drop the rows where all elements are missing: + + >>> df.dropna(how='all') + name toy born + 0 Alfred + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip + + [3 rows x 3 columns] + Args: axis ({0 or 'index', 1 or 'columns'}, default 'columns'): Determine if rows or columns which contain missing values are @@ -1231,6 +1279,39 @@ def isin(self, values): """ Whether each element in the DataFrame is contained in values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + [2 rows x 2 columns] + + When ``values`` is a list check whether every value in the DataFrame is + present in the list (which animals have 0 or 2 legs or wings). + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + [2 rows x 2 columns] + + When ``values`` is a dict, we can pass it to check for each column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + [2 rows x 2 columns] + Args: values (iterable, or dict): The result will only be true at a location if all the diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index ca5c6344ce..2885162fd6 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -499,6 +499,71 @@ def isna(self) -> NDFrame: False values. Characters such as empty strings ``''`` or :attr:`numpy.inf` are not considered NA values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> import numpy as np + + >>> df = bpd.DataFrame(dict( + ... age=[5, 6, np.nan], + ... born=[bpd.NA, "1940-04-25", "1940-04-25"], + ... name=['Alfred', 'Batman', ''], + ... toy=[None, 'Batmobile', 'Joker'], + ... )) + >>> df + age born name toy + 0 5.0 Alfred + 1 6.0 1940-04-25 Batman Batmobile + 2 1940-04-25 Joker + + [3 rows x 4 columns] + + Show which entries in a DataFrame are NA: + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + [3 rows x 4 columns] + + >>> df.isnull() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + [3 rows x 4 columns] + + Show which entries in a Series are NA: + + >>> ser = bpd.Series([5, None, 6, np.nan, bpd.NA]) + >>> ser + 0 5.0 + 1 + 2 6.0 + 3 + 4 + dtype: Float64 + + >>> ser.isna() + 0 False + 1 True + 2 False + 3 True + 4 True + dtype: boolean + + >>> ser.isnull() + 0 False + 1 True + 2 False + 3 True + 4 True + dtype: boolean + Returns: Mask of bool values for each element that indicates whether an element is an NA value. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 01cc3a0500..8a29878509 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1350,6 +1350,42 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: """ Return a new Series with missing values removed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Drop NA values from a Series: + + >>> ser = bpd.Series([1., 2., np.nan]) + >>> ser + 0 1.0 + 1 2.0 + 2 + dtype: Float64 + + >>> ser.dropna() + 0 1.0 + 1 2.0 + dtype: Float64 + + Empty strings are not considered NA values. ``None`` is considered an NA value. + + >>> ser = bpd.Series(['2', bpd.NA, '', None, 'I stay'], dtype='object') + >>> ser + 0 2 + 1 + 2 + 3 + 4 I stay + dtype: string + + >>> ser.dropna() + 0 2 + 2 + 4 I stay + dtype: string + Args: axis (0 or 'index'): Unused. Parameter needed for compatibility with DataFrame. @@ -2421,6 +2457,40 @@ def isin(self, values): the same. That is, if any form of NaN is present in values, all forms of NaN in the series will be considered a match. (though pandas may not) + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', + ... 'hippo'], name='animal') + >>> s + 0 llama + 1 cow + 2 llama + 3 beetle + 4 llama + 5 hippo + Name: animal, dtype: string + + >>> s.isin(['cow', 'llama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: boolean + + Strings and integers are distinct and are therefore not comparable: + + >>> bpd.Series([1]).isin(['1']) + 0 False + dtype: boolean + >>> bpd.Series([1.1]).isin(['1.1']) + 0 False + dtype: boolean + Args: values (list-like): The sequence of values to test. Passing in a single string will raise a From 40bbfa580ce469c9890785ee4cc619af32b1ec13 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 22 Dec 2023 03:19:23 +0000 Subject: [PATCH 2/2] fix header alignment in rendering --- third_party/bigframes_vendored/pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index ba8007c77c..2de63b9103 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1305,7 +1305,7 @@ def dropna( ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], ... "born": [bpd.NA, "1940-04-25", bpd.NA]}) >>> df - name toy born + name toy born 0 Alfred 1 Batman Batmobile 1940-04-25 2 Catwoman Bullwhip @@ -1315,7 +1315,7 @@ def dropna( Drop the rows where at least one element is missing: >>> df.dropna() - name toy born + name toy born 1 Batman Batmobile 1940-04-25 [1 rows x 3 columns] @@ -1323,7 +1323,7 @@ def dropna( Drop the columns where at least one element is missing. >>> df.dropna(axis='columns') - name + name 0 Alfred 1 Batman 2 Catwoman @@ -1333,7 +1333,7 @@ def dropna( Drop the rows where all elements are missing: >>> df.dropna(how='all') - name toy born + name toy born 0 Alfred 1 Batman Batmobile 1940-04-25 2 Catwoman Bullwhip