From 11f3d4a4d9ac96c98b28233443806e73bb5909f6 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 27 Nov 2023 19:39:44 +0000 Subject: [PATCH 1/5] docs: add examples for dataframe.kurt, dataframe.std, dataframe.count --- .../bigframes_vendored/pandas/core/frame.py | 82 ++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index b771be3041..bece91ff15 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2830,11 +2830,36 @@ def skew(self, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def kurt(self, *, numeric_only: bool = False): - """Return unbiased kurtosis over requested axis. + """Return unbiased kurtosis over columns. Kurtosis obtained using Fisher's definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], + ... "B": [3, 4, 3, 2, 1], + ... "C": [2, 2, 3, 2, 2]}) + A B C + 0 1 3 2 + 1 2 4 2 + 2 3 3 3 + 3 4 2 2 + 4 5 1 2 + + [5 rows x 3 columns] + + Calculating the kurtosis value of each column. + + >>> df.kurt() + A -1.2 + B -0.177515 + C 5.0 + dtype: Float64 + Args: numeric_only (bool, default False): Include only float, int, boolean columns. @@ -2845,10 +2870,35 @@ def kurt(self, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def std(self, *, numeric_only: bool = False): - """Return sample standard deviation over requested axis. + """Return sample standard deviation over columns. Normalized by N-1 by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], + ... "B": [3, 4, 3, 2, 1], + ... "C": [2, 2, 3, 2, 2]}) + A B C + 0 1 3 2 + 1 2 4 2 + 2 3 3 3 + 3 4 2 2 + 4 5 1 2 + + [5 rows x 3 columns] + + Calculating the standard deviation of each column. + + >>> df.std() + A 1.581139 + B 1.140175 + C 0.447214 + dtype: Float64 + Args: numeric_only (bool. default False): Default False. Include only float, int, boolean columns. @@ -2860,7 +2910,33 @@ def std(self, *, numeric_only: bool = False): def count(self, *, numeric_only: bool = False): """ - Count non-NA cells for each column or row. + Count non-NA cells for each column. + + Examples: + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5], + ... "B": [1, 2, 3, 4, 5], + ... "C": [None, 3.5, None, 4.5, 5.0]}) + >>> df + A B C + 0 1.0 1 + 1 2 3.5 + 2 3.0 3 + 3 4.0 4 4.5 + 4 5.0 5 5.0 + + [5 rows x 3 columns] + + Counting non-NA values for each column. + + >>> df.count() + A 4.0 + B 5.0 + C 3.0 + dtype: Float64 The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending on `pandas.options.mode.use_inf_as_na`) are considered NA. From 0ebfe181c7fa38c119a57d294942412f26efe648 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 27 Nov 2023 19:44:42 +0000 Subject: [PATCH 2/5] update count example --- .../bigframes_vendored/pandas/core/frame.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index bece91ff15..0b986f85ec 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2914,29 +2914,29 @@ def count(self, *, numeric_only: bool = False): Examples: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - - >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5], - ... "B": [1, 2, 3, 4, 5], - ... "C": [None, 3.5, None, 4.5, 5.0]}) - >>> df - A B C - 0 1.0 1 - 1 2 3.5 - 2 3.0 3 - 3 4.0 4 4.5 - 4 5.0 5 5.0 - - [5 rows x 3 columns] + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5], + ... "B": [1, 2, 3, 4, 5], + ... "C": [None, 3.5, None, 4.5, 5.0]}) + >>> df + A B C + 0 1.0 1 + 1 2 3.5 + 2 3.0 3 + 3 4.0 4 4.5 + 4 5.0 5 5.0 + + [5 rows x 3 columns] Counting non-NA values for each column. - >>> df.count() - A 4.0 - B 5.0 - C 3.0 - dtype: Float64 + >>> df.count() + A 4.0 + B 5.0 + C 3.0 + dtype: Float64 The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending on `pandas.options.mode.use_inf_as_na`) are considered NA. From 8c57264ee8c9dea0c647c8750997d57f9b656b12 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 27 Nov 2023 19:51:24 +0000 Subject: [PATCH 3/5] update count example --- third_party/bigframes_vendored/pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 0b986f85ec..c3e2ffb3d0 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2912,7 +2912,7 @@ def count(self, *, numeric_only: bool = False): """ Count non-NA cells for each column. - Examples: + **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None From cc987a3be0c51b543e0dae8ad4f3949f2ae1eb8e Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 27 Nov 2023 20:00:18 +0000 Subject: [PATCH 4/5] update examples --- third_party/bigframes_vendored/pandas/core/frame.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index c3e2ffb3d0..2b3d50891e 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2843,6 +2843,7 @@ def kurt(self, *, numeric_only: bool = False): >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], ... "B": [3, 4, 3, 2, 1], ... "C": [2, 2, 3, 2, 2]}) + >>> df A B C 0 1 3 2 1 2 4 2 @@ -2882,6 +2883,7 @@ def std(self, *, numeric_only: bool = False): >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], ... "B": [3, 4, 3, 2, 1], ... "C": [2, 2, 3, 2, 2]}) + >>> df A B C 0 1 3 2 1 2 4 2 @@ -2912,6 +2914,9 @@ def count(self, *, numeric_only: bool = False): """ Count non-NA cells for each column. + The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending + on `pandas.options.mode.use_inf_as_na`) are considered NA. + **Examples:** >>> import bigframes.pandas as bpd @@ -2921,7 +2926,7 @@ def count(self, *, numeric_only: bool = False): ... "B": [1, 2, 3, 4, 5], ... "C": [None, 3.5, None, 4.5, 5.0]}) >>> df - A B C + A B C 0 1.0 1 1 2 3.5 2 3.0 3 @@ -2938,9 +2943,6 @@ def count(self, *, numeric_only: bool = False): C 3.0 dtype: Float64 - The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending - on `pandas.options.mode.use_inf_as_na`) are considered NA. - Args: numeric_only (bool, default False): Include only `float`, `int` or `boolean` data. From 28f3eec79b968db7269e8f0869c8bb9114396cb3 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Tue, 28 Nov 2023 01:47:40 +0000 Subject: [PATCH 5/5] update . to : --- .../bigframes_vendored/pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 2b3d50891e..ba708e4f11 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2597,14 +2597,14 @@ def any(self, *, axis=0, bool_only: bool = False): [2 rows x 2 columns] - Checking if each column contains at least one True element(the default behavior without an explicit axis parameter). + Checking if each column contains at least one True element(the default behavior without an explicit axis parameter): >>> df.any() A True B False dtype: boolean - Checking if each row contains at least one True element. + Checking if each row contains at least one True element: >>> df.any(axis=1) 0 True @@ -2644,14 +2644,14 @@ def all(self, axis=0, *, bool_only: bool = False): [2 rows x 2 columns] - Checking if all values in each column are True(the default behavior without an explicit axis parameter). + Checking if all values in each column are True(the default behavior without an explicit axis parameter): >>> df.all() A True B False dtype: boolean - Checking across rows to see if all values are True. + Checking across rows to see if all values are True: >>> df.all(axis=1) 0 False @@ -2688,14 +2688,14 @@ def prod(self, axis=0, *, numeric_only: bool = False): [3 rows x 2 columns] - Calculating the product of each column(the default behavior without an explicit axis parameter). + Calculating the product of each column(the default behavior without an explicit axis parameter): >>> df.prod() A 6.0 B 160.875 dtype: Float64 - Calculating the product of each row. + Calculating the product of each row: >>> df.prod(axis=1) 0 4.5 @@ -2853,7 +2853,7 @@ def kurt(self, *, numeric_only: bool = False): [5 rows x 3 columns] - Calculating the kurtosis value of each column. + Calculating the kurtosis value of each column: >>> df.kurt() A -1.2 @@ -2893,7 +2893,7 @@ def std(self, *, numeric_only: bool = False): [5 rows x 3 columns] - Calculating the standard deviation of each column. + Calculating the standard deviation of each column: >>> df.std() A 1.581139 @@ -2935,7 +2935,7 @@ def count(self, *, numeric_only: bool = False): [5 rows x 3 columns] - Counting non-NA values for each column. + Counting non-NA values for each column: >>> df.count() A 4.0