From 765e673fc58a61285bbf09daaffec35d1211c6d6 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Tue, 19 Dec 2023 01:29:23 +0000 Subject: [PATCH 1/2] docs: code samples for `reset_index` and `sort_values` --- .../bigframes_vendored/pandas/core/frame.py | 161 ++++++++++++++++++ .../bigframes_vendored/pandas/core/series.py | 110 ++++++++++++ 2 files changed, 271 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index c082b87336..910c0620d1 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1047,6 +1047,93 @@ def reset_index( Reset the index of the DataFrame, and use the default one instead. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> import numpy as np + >>> df = bpd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal + + [4 rows x 2 columns] + + When we reset the index, the old index is added as a column, and a new sequential index is used: + + >>> df.reset_index() + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal + + [4 rows x 3 columns] + + We can use the ``drop`` parameter to avoid the old index being added as a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal + + [4 rows x 2 columns] + + You can also use ``reset_index`` with ``MultiIndex``. + + >>> import pandas as pd + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = ['speed', 'max'] + >>> df = bpd.DataFrame([(389.0, 'fly'), + ... (24.0, 'fly'), + ... (80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed max + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey jump + + [4 rows x 2 columns] + + >>> df.reset_index() + class name speed max + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey jump + + [4 rows x 4 columns] + + >>> df.reset_index(drop=True) + speed max + 0 389.0 fly + 1 24.0 fly + 2 80.5 run + 3 jump + + [4 rows x 2 columns] + + Args: drop (bool, default False): Do not try to insert index into dataframe columns. This resets @@ -1256,6 +1343,80 @@ def sort_values( ) -> DataFrame: """Sort by the values along row axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({ + ... 'col1': ['A', 'A', 'B', bpd.NA, 'D', 'C'], + ... 'col2': [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] + ... }) + >>> df + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + [6 rows x 4 columns] + + Sort by col1: + + >>> df.sort_values(by=['col1']) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 8 4 D + + [6 rows x 4 columns] + + Sort by multiple columns: + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 col4 + 1 A 1 1 B + 0 A 2 0 a + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 8 4 D + + [6 rows x 4 columns] + + Sort Descending: + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 col4 + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + 3 8 4 D + + [6 rows x 4 columns] + + Putting NAs first: + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 col4 + 3 8 4 D + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + + [6 rows x 4 columns] + Args: by (str or Sequence[str]): Name or list of names to sort by. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8303df5ef4..6a5fe4adff 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -168,6 +168,53 @@ def reset_index( when the index is meaningless and needs to be reset to the default before another operation. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4], name='foo', + ... index=['a', 'b', 'c', 'd']) + >>> s.index.name = "idx" + >>> s + idx + a 1 + b 2 + c 3 + d 4 + Name: foo, dtype: Int64 + + Generate a DataFrame with default index. + + >>> s.reset_index() + idx foo + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + [4 rows x 2 columns] + + To specify the name of the new column use ``name`` param. + + >>> s.reset_index(name="bar") + idx bar + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + [4 rows x 2 columns] + + To generate a new Series with the default index set param ``drop=True``. + + >>> s.reset_index(drop=True) + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: Int64 + Args: drop (bool, default False): Just reset the index, without inserting it as a column in @@ -699,6 +746,69 @@ def sort_values( Sort a Series in ascending or descending order by some criterion. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([np.nan, 1, 3, 10, 5]) + >>> s + 0 + 1 1.0 + 2 3.0 + 3 10.0 + 4 5.0 + dtype: Float64 + + Sort values ascending order (default behaviour): + + >>> s.sort_values(ascending=True) + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + 0 + dtype: Float64 + + Sort values descending order: + + >>> s.sort_values(ascending=False) + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 + dtype: Float64 + + Sort values putting NAs first: + + >>> s.sort_values(na_position='first') + 0 + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + dtype: Float64 + + Sort a series of strings: + + >>> s = bpd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s + 0 z + 1 b + 2 d + 3 a + 4 c + dtype: string + + >>> s.sort_values() + 3 a + 1 b + 4 c + 2 d + 0 z + dtype: string + Args: axis (0 or 'index'): Unused. Parameter needed for compatibility with DataFrame. From efd76fb8b34fddd4ab265c9ebbc95fc253976a87 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 21 Dec 2023 22:56:13 +0000 Subject: [PATCH 2/2] fix alignment in dataframe api code samples --- .../bigframes_vendored/pandas/core/frame.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 260680b5bb..6dc2397a76 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1054,11 +1054,11 @@ def reset_index( >>> import numpy as np >>> df = bpd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) >>> df class max_speed falcon bird 389.0 @@ -1100,18 +1100,18 @@ class max_speed ... names=['class', 'name']) >>> columns = ['speed', 'max'] >>> df = bpd.DataFrame([(389.0, 'fly'), - ... (24.0, 'fly'), - ... (80.5, 'run'), - ... (np.nan, 'jump')], - ... index=index, - ... columns=columns) + ... (24.0, 'fly'), + ... (80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) >>> df - speed max + speed max class name bird falcon 389.0 fly - parrot 24.0 fly + parrot 24.0 fly mammal lion 80.5 run - monkey jump + monkey jump [4 rows x 2 columns] @@ -1125,7 +1125,7 @@ class name speed max [4 rows x 4 columns] >>> df.reset_index(drop=True) - speed max + speed max 0 389.0 fly 1 24.0 fly 2 80.5 run @@ -1355,7 +1355,7 @@ def sort_values( ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] ... }) >>> df - col1 col2 col3 col4 + col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B 2 B 9 9 c @@ -1368,7 +1368,7 @@ def sort_values( Sort by col1: >>> df.sort_values(by=['col1']) - col1 col2 col3 col4 + col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B 2 B 9 9 c @@ -1381,7 +1381,7 @@ def sort_values( Sort by multiple columns: >>> df.sort_values(by=['col1', 'col2']) - col1 col2 col3 col4 + col1 col2 col3 col4 1 A 1 1 B 0 A 2 0 a 2 B 9 9 c @@ -1394,7 +1394,7 @@ def sort_values( Sort Descending: >>> df.sort_values(by='col1', ascending=False) - col1 col2 col3 col4 + col1 col2 col3 col4 4 D 7 2 e 5 C 4 3 F 2 B 9 9 c @@ -1407,7 +1407,7 @@ def sort_values( Putting NAs first: >>> df.sort_values(by='col1', ascending=False, na_position='first') - col1 col2 col3 col4 + col1 col2 col3 col4 3 8 4 D 4 D 7 2 e 5 C 4 3 F