From 73bc841401f83727dd319d315c790eab45666607 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 30 Nov 2023 20:19:22 +0000 Subject: [PATCH 1/3] docs: add examples for dataframe.nunique, dataframe.diff, dataframe.agg, dataframe.describe --- .../bigframes_vendored/pandas/core/frame.py | 114 +++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 099d8b8e66..a3935faa4d 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -3323,7 +3323,26 @@ def melt(self, id_vars, value_vars, var_name, value_name): def nunique(self): """ - Count number of distinct elements in specified axis. + Count number of distinct elements in each column. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 2]}) + >>> df + A B + 0 3 1 + 1 1 2 + 2 2 2 + + [3 rows x 2 columns] + + >>> df.nunique() + A 3.0 + B 2.0 + dtype: Float64 Returns: bigframes.series.Series: Series with number of distinct elements. @@ -3379,6 +3398,40 @@ def diff( Calculates the difference of a DataFrame element compared with another element in the DataFrame (default is element in previous row). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) + >>> df + A B + 0 3 1 + 1 1 2 + 2 2 3 + + [3 rows x 2 columns] + + Calculating difference with default periods=1: + + >>> df.diff() + A B + 0 + 1 -2 1 + 2 1 1 + + [3 rows x 2 columns] + + Calculating difference with periods=-1: + + >>> df.diff(periods=-1) + A B + 0 2 -1 + 1 -1 -1 + 2 + + [3 rows x 2 columns] + Args: periods (int, default 1): Periods to shift for calculating difference, accepts negative @@ -3391,7 +3444,37 @@ def diff( def agg(self, func): """ - Aggregate using one or more operations over the specified axis. + Aggregate using one or more operations over columns. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) + >>> df + A B + 0 3 1 + 1 1 2 + 2 2 3 + + [3 rows x 2 columns] + + Using a single function: + + >>> df.agg('sum') + A 6.0 + B 6.0 + dtype: Float64 + + Using a list of functions: + + >>> df.agg(['sum', 'mean']) + A B + sum 6.0 6.0 + mean 2.0 2.0 + + [2 rows x 2 columns] Args: func (function): @@ -3414,6 +3497,33 @@ def describe(self): Only supports numeric columns. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8]}) + >>> df + A B + 0 3 0 + 1 1 2 + 2 2 8 + + [3 rows x 2 columns] + + >>> df.describe() + A B + count 3.0 3.0 + mean 2.0 3.333333 + std 1.0 4.163332 + min 1.0 0.0 + 25% 1.0 0.0 + 50% 2.0 2.0 + 75% 3.0 8.0 + max 3.0 8.0 + + [8 rows x 2 columns] + .. note:: Percentile values are approximates only. From b50dcb5705b9d4e325ea9fa2464e5f12ed9eed40 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 30 Nov 2023 20:53:15 +0000 Subject: [PATCH 2/3] update spacing --- .../bigframes_vendored/pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index a3935faa4d..216b4cbe56 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -3512,15 +3512,15 @@ def describe(self): [3 rows x 2 columns] >>> df.describe() - A B - count 3.0 3.0 - mean 2.0 3.333333 - std 1.0 4.163332 - min 1.0 0.0 - 25% 1.0 0.0 - 50% 2.0 2.0 - 75% 3.0 8.0 - max 3.0 8.0 + A B + count 3.0 3.0 + mean 2.0 3.333333 + std 1.0 4.163332 + min 1.0 0.0 + 25% 1.0 0.0 + 50% 2.0 2.0 + 75% 3.0 8.0 + max 3.0 8.0 [8 rows x 2 columns] From bb22ebd01ec63d57a81473c7d90180281c9a75bc Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 30 Nov 2023 20:54:20 +0000 Subject: [PATCH 3/3] update ordering --- .../bigframes_vendored/pandas/core/frame.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 216b4cbe56..bc39a28e9b 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -3497,6 +3497,16 @@ def describe(self): Only supports numeric columns. + .. note:: + Percentile values are approximates only. + + .. note:: + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is the + same as the median. + **Examples:** >>> import bigframes.pandas as bpd @@ -3524,16 +3534,6 @@ def describe(self): [8 rows x 2 columns] - .. note:: - Percentile values are approximates only. - - .. note:: - For numeric data, the result's index will include ``count``, - ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and - upper percentiles. By default the lower percentile is ``25`` and the - upper percentile is ``75``. The ``50`` percentile is the - same as the median. - Returns: bigframes.dataframe.DataFrame: Summary statistics of the Series or Dataframe provided. """