From 1d55be4b104f03ebaa3a7f7312e53a51c521f194 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 28 Dec 2023 07:30:28 +0000 Subject: [PATCH 1/2] docs: code samples for `DataFrame.rename`, `Series.rename` --- .../bigframes_vendored/pandas/core/frame.py | 24 ++++ .../bigframes_vendored/pandas/core/generic.py | 24 ++++ .../bigframes_vendored/pandas/core/series.py | 113 ++++++++++++++++++ 3 files changed, 161 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index d7ecae102b..be1ebdbfb8 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1022,6 +1022,30 @@ def rename( Dict values must be unique (1-to-1). Labels not contained in a dict will be left as-is. Extra labels listed don't throw an error. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + [3 rows x 2 columns] + + Rename columns using a mapping: + + >>> df.rename(columns={"A": "col1", "B": "col2"}) + col1 col2 + 0 1 4 + 1 2 5 + 2 3 6 + + [3 rows x 2 columns] + Args: columns (Mapping): Dict-like from old column labels to new column labels. diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 2885162fd6..0c29b0d4d5 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -29,6 +29,30 @@ def ndim(self) -> int: def size(self) -> int: """Return an int representing the number of elements in this object. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s + a 1 + b 2 + c 3 + dtype: Int64 + >>> + >>> s.size + 3 + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + [2 rows x 2 columns] + >>> df.size + 4 + Returns: int: Return the number of rows if Series. Otherwise return the number of rows times number of columns if DataFrame. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index b0a4cb8193..f181a7597f 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -135,6 +135,21 @@ def name(self) -> Hashable: to form a DataFrame. It is also used whenever displaying the Series using the interpreter. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3], dtype="Int64", name='Numbers') + >>> s + 0 1 + 1 2 + 2 3 + Name: Numbers, dtype: Int64 + + >>> s.name + 'Numbers' + Returns: hashable object: The name of the Series, also the column name if part of a DataFrame. @@ -545,6 +560,27 @@ def agg(self, func): """ Aggregate using one or more operations over the specified axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: Int64 + + >>> s.agg('min') + 1 + + >>> s.agg(['min', 'max']) + min 1.0 + max 4.0 + dtype: Float64 + Args: func (function): Function to use for aggregating the data. @@ -2244,6 +2280,29 @@ def std( Normalized by N-1 by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'person_id': [0, 1, 2, 3], + ... 'age': [21, 25, 62, 43], + ... 'height': [1.61, 1.87, 1.49, 2.01]} + ... ).set_index('person_id') + >>> df + age height + person_id + 0 21 1.61 + 1 25 1.87 + 2 62 1.49 + 3 43 2.01 + + [4 rows x 2 columns] + + >>> df.std() + age 18.786076 + height 0.237417 + dtype: Float64 Returns ------- @@ -2601,6 +2660,34 @@ def rename(self, index, **kwargs) -> Series | None: Alternatively, change ``Series.name`` with a scalar value. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: Int64 + + You can changes the Series name by specifying a string scalar: + + >>> s.rename("my_name") + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: Int64 + + You can change the labels by specifying a mapping: + + >>> s.rename({1: 3, 2: 5}) + 0 1 + 3 2 + 5 3 + dtype: Int64 + Args: index (scalar, hashable sequence, dict-like or function optional): Functions or dict-like are transformations to apply to @@ -2901,3 +2988,29 @@ def values(self): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def size(self) -> int: + """Return the number of elements in the underlying data. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + For Series: + + >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.size + 3 + + For Index: + + >>> idx = bpd.Index(bpd.Series([1, 2, 3])) + >>> idx.size + 3 + + Returns: + int: Return the number of elements in the underlying data. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From ec7f39a827dbe32fdf88e7101761b00a6e42a704 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 28 Dec 2023 20:23:26 +0000 Subject: [PATCH 2/2] improve the `size` code samples a bit --- .../bigframes_vendored/pandas/core/generic.py | 13 +------------ .../bigframes_vendored/pandas/core/series.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 0c29b0d4d5..4aa2ac5ac0 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -35,21 +35,10 @@ def size(self) -> int: >>> bpd.options.display.progress_bar = None >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) - >>> s - a 1 - b 2 - c 3 - dtype: Int64 - >>> >>> s.size 3 + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) - >>> df - col1 col2 - 0 1 3 - 1 2 4 - - [2 rows x 2 columns] >>> df.size 4 diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index f181a7597f..61ff2984d0 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -140,16 +140,30 @@ def name(self) -> Hashable: >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + For a Series: + >>> s = bpd.Series([1, 2, 3], dtype="Int64", name='Numbers') >>> s 0 1 1 2 2 3 Name: Numbers, dtype: Int64 - >>> s.name 'Numbers' + If the Series is part of a DataFrame: + + >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + [2 rows x 2 columns] + >>> s = df["col1"] + >>> s.name + 'col1' + Returns: hashable object: The name of the Series, also the column name if part of a DataFrame.