From c43dcae9e71cc79354f1dac8444dfdeaa15b9645 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 28 Dec 2023 22:36:10 +0000 Subject: [PATCH 1/2] docs: code samples for `sample`, `get`, `Series.round` --- .../bigframes_vendored/pandas/core/frame.py | 78 ++++++++++++++- .../bigframes_vendored/pandas/core/generic.py | 94 +++++++++++++++++++ .../bigframes_vendored/pandas/core/series.py | 19 ++++ 3 files changed, 189 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index f2de8fcb6a..0872160abe 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1187,6 +1187,47 @@ def set_index( Set the DataFrame index (row labels) using one existing column. The index can replace the existing index. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + [4 rows x 3 columns] + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + [4 rows x 2 columns] + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + [4 rows x 1 columns] + Args: keys: A label. This parameter can be a single column key. @@ -1621,6 +1662,39 @@ def items(self): Iterates over the DataFrame columns, returning a tuple with the column name and the content as a Series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'species': ['bear', 'bear', 'marsupial'], + ... 'population': [1864, 22000, 80000]}, + ... index=['panda', 'polar', 'koala']) + >>> df + species population + panda bear 1864 + polar bear 22000 + koala marsupial 80000 + + [3 rows x 2 columns] + + >>> for label, content in df.items(): + ... print(f'--> label: {label}') + ... print(f'--> content:\\n{content}') + ... + --> label: species + --> content: + panda bear + polar bear + koala marsupial + Name: species, dtype: string + --> label: population + --> content: + panda 1864 + polar 22000 + koala 80000 + Name: population, dtype: Int64 + Returns: Iterator: Iterator of label, Series for each column. """ @@ -4587,7 +4661,7 @@ def index(self): ... 'Location': ['Seattle', 'New York', 'Kona']}, ... index=([10, 20, 30])) >>> df - Name Age Location + Name Age Location 10 Alice 25 Seattle 20 Bob 30 New York 30 Aritra 35 Kona @@ -4603,7 +4677,7 @@ def index(self): >>> df1 = df.set_index(["Name", "Location"]) >>> df1 - Age + Age Name Location Alice Seattle 25 Bob New York 30 diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 2885162fd6..e75379d4e5 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -254,6 +254,55 @@ def get(self, key, default=None): Returns default value if not found. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame( + ... [ + ... [24.3, 75.7, "high"], + ... [31, 87.8, "high"], + ... [22, 71.6, "medium"], + ... [35, 95, "medium"], + ... ], + ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], + ... index=["2014-02-12", "2014-02-13", "2014-02-14", "2014-02-15"], + ... ) + >>> df + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + [4 rows x 3 columns] + + >>> df.get(["temp_celsius", "windspeed"]) + temp_celsius windspeed + 2014-02-12 24.3 high + 2014-02-13 31.0 high + 2014-02-14 22.0 medium + 2014-02-15 35.0 medium + + [4 rows x 2 columns] + + >>> ser = df['windspeed'] + >>> ser + 2014-02-12 high + 2014-02-13 high + 2014-02-14 medium + 2014-02-15 medium + Name: windspeed, dtype: string + >>> ser.get('2014-02-13') + 'high' + + If the key is not found, the default value will be used. + + >>> df.get(["temp_celsius", "temp_kelvin"]) + >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") + 'default_value' + Args: key: object @@ -410,6 +459,51 @@ def sample( You can use `random_state` for reproducibility. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'num_legs': [2, 4, 8, 0], + ... 'num_wings': [2, 0, 0, 0], + ... 'num_specimen_seen': [10, 2, 1, 8]}, + ... index=['falcon', 'dog', 'spider', 'fish']) + >>> df + num_legs num_wings num_specimen_seen + falcon 2 2 10 + dog 4 0 2 + spider 8 0 1 + fish 0 0 8 + + [4 rows x 3 columns] + + Fetch one random row from the DataFrame (Note that we use `random_state` + to ensure reproducibility of the examples): + + >>> df.sample(random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + + [1 rows x 3 columns] + + A random 50% sample of the DataFrame: + + >>> df.sample(frac=0.5, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + + [2 rows x 3 columns] + + Extract 3 random elements from the Series `df['num_legs']`: + + >>> s = df['num_legs'] + >>> s.sample(n=3, random_state=1) + dog 4 + fish 0 + spider 8 + Name: num_legs, dtype: Int64 + Args: n (Optional[int], default None): Number of items from axis to return. Cannot be used with `frac`. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index b0a4cb8193..2fe02c2964 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -693,6 +693,25 @@ def round(self, decimals: int = 0) -> Series: """ Round each value in a Series to the given number of decimals. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([0.1, 1.3, 2.7]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: Float64 + + >>> s = bpd.Series([0.123, 1.345, 2.789]) + >>> s.round(decimals=2) + 0 0.12 + 1 1.34 + 2 2.79 + dtype: Float64 + Args: decimals (int, default 0): Number of decimal places to round to. If decimals is negative, From 5db92c60cd517ff7fd9ff2ce4fbea9188b3f0c78 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 29 Dec 2023 00:29:49 +0000 Subject: [PATCH 2/2] one rephrasing --- third_party/bigframes_vendored/pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 0872160abe..9259d14bab 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1204,7 +1204,7 @@ def set_index( [4 rows x 3 columns] - Set the index to become the 'month' column: + Set the 'month' column to become the index: >>> df.set_index('month') year sale