diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 6dfcc17f37..891c372a10 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -288,7 +288,7 @@ def bigframes_dtype_to_ibis_dtype( f""" Unexpected data type {bigframes_dtype}. The following str dtypes are supppted: 'boolean','Float64','Int64', 'string', - 'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]', + 'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]', 'timestamp[us][pyarrow]','date32[day][pyarrow]', 'time64[us][pyarrow]'. The following pandas.ExtensionDtype are supported: pandas.BooleanDtype(), pandas.Float64Dtype(), diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 607243f844..ca5c6344ce 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -82,10 +82,46 @@ def astype(self, dtype): """ Cast a pandas object to a specified dtype ``dtype``. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Create a DataFrame: + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = bpd.DataFrame(data=d) + >>> df.dtypes + col1 Int64 + col2 Int64 + dtype: object + + Cast all columns to ``Float64``: + + >>> df.astype('Float64').dtypes + col1 Float64 + col2 Float64 + dtype: object + + Create a series of type ``Int64``: + + >>> ser = bpd.Series([1, 2], dtype='Int64') + >>> ser + 0 1 + 1 2 + dtype: Int64 + + Convert to ``Float64`` type: + + >>> ser.astype('Float64') + 0 1.0 + 1 2.0 + dtype: Float64 + Args: dtype (str or pandas.ExtensionDtype): A dtype supported by BigQuery DataFrame include 'boolean','Float64','Int64', - 'string', 'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]', + 'string', 'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]', 'timestamp[us][pyarrow]','date32[day][pyarrow]','time64[us][pyarrow]' A pandas.ExtensionDtype include pandas.BooleanDtype(), pandas.Float64Dtype(), pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"), diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8303df5ef4..d054684598 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -890,6 +890,95 @@ def groupby( used to group large amounts of data and compute operations on these groups. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + You can group by a named index level. + + >>> s = bpd.Series([380, 370., 24., 26.], + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], + ... name="Max Speed") + >>> s.index.name="Animal" + >>> s + Animal + Falcon 380.0 + Falcon 370.0 + Parrot 24.0 + Parrot 26.0 + Name: Max Speed, dtype: Float64 + >>> s.groupby("Animal").mean() + Animal + Falcon 375.0 + Parrot 25.0 + Name: Max Speed, dtype: Float64 + + You can also group by more than one index levels. + + >>> import pandas as pd + >>> s = bpd.Series([380, 370., 24., 26.], + ... index=pd.MultiIndex.from_tuples( + ... [("Falcon", "Clear"), + ... ("Falcon", "Cloudy"), + ... ("Parrot", "Clear"), + ... ("Parrot", "Clear")], + ... names=["Animal", "Sky"]), + ... name="Max Speed") + >>> s + Animal Sky + Falcon Clear 380.0 + Cloudy 370.0 + Parrot Clear 24.0 + Clear 26.0 + Name: Max Speed, dtype: Float64 + + >>> s.groupby("Animal").mean() + Animal + Falcon 375.0 + Parrot 25.0 + Name: Max Speed, dtype: Float64 + + >>> s.groupby("Sky").mean() + Sky + Clear 143.333333 + Cloudy 370.0 + Name: Max Speed, dtype: Float64 + + >>> s.groupby(["Animal", "Sky"]).mean() + Animal Sky + Falcon Clear 380.0 + Cloudy 370.0 + Parrot Clear 25.0 + Name: Max Speed, dtype: Float64 + + You can also group by values in a Series provided the index matches with + the original series. + + >>> df = bpd.DataFrame({'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'], + ... 'Max Speed': [380., 370., 24., 26.], + ... 'Age': [10., 20., 4., 6.]}) + >>> df + Animal Max Speed Age + 0 Falcon 380.0 10.0 + 1 Falcon 370.0 20.0 + 2 Parrot 24.0 4.0 + 3 Parrot 26.0 6.0 + + [4 rows x 3 columns] + + >>> df['Max Speed'].groupby(df['Animal']).mean() + Animal + Falcon 375.0 + Parrot 25.0 + Name: Max Speed, dtype: Float64 + + >>> df['Age'].groupby(df['Animal']).max() + Animal + Falcon 20.0 + Parrot 6.0 + Name: Age, dtype: Float64 + Args: by (mapping, function, label, pd.Grouper or list of such, default None): Used to determine the groups for the groupby. @@ -1661,6 +1750,31 @@ def max( If you want the index of the maximum, use ``idxmax``. This is the equivalent of the ``numpy.ndarray`` method ``argmax``. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Calculating the max of a Series: + + >>> s = bpd.Series([1, 3]) + >>> s + 0 1 + 1 3 + dtype: Int64 + >>> s.max() + 3 + + Calculating the max of a Series containing ``NA`` values: + + >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s + 0 1.0 + 1 3.0 + 2 + dtype: Float64 + >>> s.max() + 3.0 Returns: scalar: Scalar. @@ -1676,6 +1790,32 @@ def min( If you want the index of the minimum, use ``idxmin``. This is the equivalent of the ``numpy.ndarray`` method ``argmin``. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Calculating the min of a Series: + + >>> s = bpd.Series([1, 3]) + >>> s + 0 1 + 1 3 + dtype: Int64 + >>> s.min() + 1 + + Calculating the min of a Series containing ``NA`` values: + + >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s + 0 1.0 + 1 3.0 + 2 + dtype: Float64 + >>> s.min() + 1.0 + Returns: scalar: Scalar. """ @@ -1714,6 +1854,32 @@ def sum(self): This is equivalent to the method ``numpy.sum``. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Calculating the sum of a Series: + + >>> s = bpd.Series([1, 3]) + >>> s + 0 1 + 1 3 + dtype: Int64 + >>> s.sum() + 4 + + Calculating the sum of a Series containing ``NA`` values: + + >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s + 0 1.0 + 1 3.0 + 2 + dtype: Float64 + >>> s.sum() + 4.0 + Returns: scalar: Scalar. """ @@ -1722,6 +1888,32 @@ def sum(self): def mean(self): """Return the mean of the values over the requested axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Calculating the mean of a Series: + + >>> s = bpd.Series([1, 3]) + >>> s + 0 1 + 1 3 + dtype: Int64 + >>> s.mean() + 2.0 + + Calculating the mean of a Series containing ``NA`` values: + + >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s + 0 1.0 + 1 3.0 + 2 + dtype: Float64 + >>> s.mean() + 2.0 + Returns: scalar: Scalar. """