From 17a36d8f1517e9a979e841a11a85bcb7d077ac70 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 7 Jan 2025 17:00:06 -0600 Subject: [PATCH 01/16] docs: update bigframes.pandas.Series docs --- .../bigframes_vendored/pandas/core/frame.py | 6 +- .../bigframes_vendored/pandas/core/series.py | 2092 ++++++++++++++--- 2 files changed, 1812 insertions(+), 286 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 053ed7b94c..988a055baa 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -6771,9 +6771,10 @@ def iat(self): **Examples:** >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... columns=['A', 'B', 'C']) - >>> bpd.options.display.progress_bar = None >>> df A B C 0 0 2 3 @@ -6804,9 +6805,10 @@ def at(self): **Examples:** >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... index=[4, 5, 6], columns=['A', 'B', 'C']) - >>> bpd.options.display.progress_bar = None >>> df A B C 4 0 2 3 diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 7c8f452a8f..a32b0525b0 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -685,7 +685,7 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Returns: - Series: The unique values returned as a Series. + bigframes.pandas.Series: The unique values returned as a Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -697,8 +697,39 @@ def mode(self) -> Series: Always returns Series even if only one value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> bpd.Series([2, 1, 3, 3], name='A').unique() + 0 2 + 1 1 + 2 3 + Name: A, dtype: Int64 + + + >>> bpd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + 0 2016-01-01 00:00:00 + dtype: timestamp[us][pyarrow] + + An Categorical will return categories in the order of appearance and + with the same dtype. + + >>> bpd.Series(pd.Categorical(list('baabc'))).unique() + ['b', 'a', 'c'] + + >>> bpd.Series(pd.Categorical(list('baabc'), categories=list('abc'), + ... ordered=True)).unique() + 0 b + 1 a + 4 c + dtype: string + Returns: - bigframes.series.Series: Modes of the Series in sorted order. + bigframes.pandas.Series: + Modes of the Series in sorted order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -710,6 +741,53 @@ def drop_duplicates( """ Return Series with duplicate values removed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Generate a Series with duplicated entries. + + >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], + name='animal') + >>> s + 0 llama + 1 cow + 2 llama + 3 beetle + 4 llama + 5 hippo + Name: animal, dtype: string + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each set + of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 llama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: string + + The value ‘last’ for parameter ‘keep’ keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 llama + 5 hippo + Name: animal, dtype: string + + The value False for parameter ‘keep’ discards all sets of duplicated entries. + + >>> s.drop_duplicates(keep=False) + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: string + Args: keep ({'first', 'last', ``False``}, default 'first'): Method to handle dropping duplicates: @@ -719,7 +797,8 @@ def drop_duplicates( ``False`` : Drop all duplicates. Returns: - bigframes.series.Series: Series with duplicates dropped or None if ``inplace=True``. + bigframes.pandas.Series: + Series with duplicates dropped or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -731,6 +810,54 @@ def duplicated(self, keep="first") -> Series: Series. Either all duplicates, all except the first or all except the last occurrence of duplicates can be indicated. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: boolean + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: boolean + + By using ‘last’, the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: boolean + + By setting keep on False, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: boolean + Args: keep ({'first', 'last', False}, default 'first'): Method to handle dropping duplicates: @@ -742,7 +869,8 @@ def duplicated(self, keep="first") -> Series: ``False`` : Mark all duplicates as ``True``. Returns: - bigframes.series.Series: Series indicating whether each value has occurred in the + bigframes.pandas.Series: + Series indicating whether each value has occurred in the preceding values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -754,6 +882,23 @@ def idxmin(self) -> Hashable: If multiple values equal the minimum, the first row label with that value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B + C 4.0 + D 1.0 + dtype: Float64 + + >>> s.idxmin() + 'A' + Returns: Index: Label of the minimum value. """ @@ -766,6 +911,24 @@ def idxmax(self) -> Hashable: If multiple values equal the maximum, the first row label with that value is returned. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B + C 4.0 + D 3.0 + E 4.0 + dtype: Float64 + + >>> s.idxmax() + 'C' + Returns: Index: Label of the maximum value. """ @@ -800,7 +963,8 @@ def round(self, decimals: int = 0) -> Series: it specifies the number of positions to the left of the decimal point. Returns: - bigframes.series.Series: Rounded values of the Series. + bigframes.pandas.Series: + Rounded values of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -814,6 +978,12 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([[1, 2, 3], [], [3, 4]]) + >>> s + 0 [1 2 3] + 1 [] + 2 [3 4] + dtype: list[pyarrow] + >>> s.explode() 0 1 0 2 @@ -828,7 +998,8 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: If True, the resulting index will be labeled 0, 1, …, n - 1. Returns: - bigframes.series.Series: Exploded lists to rows; index will be duplicated for these rows. + bigframes.pandas.Series: + Exploded lists to rows; index will be duplicated for these rows. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -865,7 +1036,8 @@ def corr(self, other, method="pearson", min_periods=None) -> float: are not yet supported, so a result will be returned for at least two observations. Returns: - float: Will return NaN if there are fewer than two numeric pairs, either series has a + float: + Will return NaN if there are fewer than two numeric pairs, either series has a variance or covariance of zero, or any input value is infinite. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -888,7 +1060,7 @@ def autocorr(self, lag: int = 1) -> float: >>> s.autocorr(lag=2) np.float64(-1.0) - If the Pearson correlation is not well defined, then 'NaN' is returned. + If the Pearson correlation is not well defined, then 'NaN' is returned. >>> s = bpd.Series([1, 0, 0, 0]) >>> s.autocorr() @@ -899,7 +1071,8 @@ def autocorr(self, lag: int = 1) -> float: Number of lags to apply before performing autocorrelation. Returns: - float: The Pearson correlation between self and self.shift(lag). + float: + The Pearson correlation between self and self.shift(lag). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -913,6 +1086,16 @@ def cov( The two `Series` objects are not required to be the same length and will be aligned internally before the covariance is calculated. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s1 = bpd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = bpd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + np.float64(-0.01685762652715874) + Args: other (Series): Series with which to compute the covariance. @@ -928,8 +1111,49 @@ def diff(self) -> Series: """ First discrete difference of element. - Calculates the difference of a {klass} element compared with another - element in the {klass} (default is element in previous row). + Calculates the difference of a Series element compared with another + element in the Series (default is element in previous row). + + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Difference with previous row + + >>> s = bpd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 + 1 0 + 2 1 + 3 1 + 4 2 + 5 3 + dtype: Int64 + + Difference with 3rd previous row + + >>>s.diff(periods=3) + 0 + 1 + 2 + 3 2 + 4 4 + 5 6 + dtype: Int64 + + + Difference with following row + + >>> s.diff(periods=-1) + 0 0 + 1 -1 + 2 -1 + 3 -2 + 4 -3 + 5 + dtype: Int64 Args: periods (int, default 1): @@ -937,7 +1161,8 @@ def diff(self) -> Series: values. Returns: - Series: First differences of the Series. + bigframes.pandas.Series: + First differences of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -977,7 +1202,8 @@ def dot(self, other) -> Series | np.ndarray: The other object to compute the dot product with its columns. Returns: - scalar, Series or numpy.ndarray: Return the dot product of the Series + scalar, bigframes.pandas.Series or numpy.ndarray: + Return the dot product of the Series and other if other is a Series, the Series of the dot product of Series and each rows of other if other is a DataFrame or a numpy.ndarray between the Series and each columns of the numpy array. @@ -1015,6 +1241,7 @@ def sort_values( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([np.nan, 1, 3, 10, 5]) @@ -1081,15 +1308,16 @@ def sort_values( ascending (bool or list of bools, default True): If True, sort values in ascending order, otherwise descending. kind (str, default to 'quicksort'): - Choice of sorting algorithm. Accepts 'quicksort’, ‘mergesort’, - ‘heapsort’, ‘stable’. Ignored except when determining whether to + Choice of sorting algorithm. Accepts quicksort', 'mergesort', + 'heapsort', 'stable'. Ignored except when determining whether to sort stably. 'mergesort' or 'stable' will result in stable reorder na_position ({'first' or 'last'}, default 'last'): Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. Returns: - bigframes.series.Series: Series ordered by values or None if ``inplace=True``. + bigframes.pandas.Series or None: + Series ordered by values or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1106,6 +1334,71 @@ def sort_index( Returns a new Series sorted by label if `inplace` argument is ``False``, otherwise updates the original series and returns None. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: string + + Sort Descending + + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: string + + By default NaNs are put at the end, but use na_position to place them at + the beginning + + >>> s = bpd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position='first') + d + 1.0 c + 2.0 b + 3.0 a + dtype: string + + Specify index level to sort + + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + 'baz', 'baz', 'bar', 'bar']), + np.array(['two', 'one', 'two', 'one', + 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1) + bar one 8 + baz one 6 + foo one 4 + qux one 2 + bar two 7 + baz two 5 + foo two 3 + qux two 1 + dtype: int64 + + Does not sort by remaining levels when sorting by levels + + >>> s.sort_index(level=1, sort_remaining=False) + qux one 2 + foo one 4 + baz one 6 + bar one 8 + qux two 1 + foo two 3 + baz two 5 + bar two 7 + dtype: int64 + Args: axis ({0 or 'index'}): Unused. Parameter needed for compatibility with DataFrame. @@ -1117,7 +1410,8 @@ def sort_index( Not implemented for MultiIndex. Returns: - bigframes.series.Series: The original Series sorted by the labels or None if + bigframes.pandas.Series or None: + The original Series sorted by the labels or None if ``inplace=True``. """ @@ -1130,6 +1424,69 @@ def nlargest( """ Return the largest `n` elements. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> countries_population = {"Italy": 59000000, "France": 65000000, + "Malta": 434000, "Maldives": 434000, + "Brunei": 434000, "Iceland": 337000, + "Nauru": 11300, "Tuvalu": 11300, + "Anguilla": 11300, "Montserrat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The n largest elements where `n=5` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The n largest elements where `n=3`. Default keep value is `first` so Malta + will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The n largest elements where `n=3` and keeping the last duplicates. Brunei + will be kept since it is the last with value 434000 based on the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The n largest elements where n`=3` with all duplicates kept. Note that the + returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + Args: n (int, default 5): Return this many descending sorted values. @@ -1144,7 +1501,8 @@ def nlargest( size larger than `n`. Returns: - bigframes.series.Series: The `n` largest values in the Series, sorted in decreasing order. + bigframes.pandas.Series: + The `n` largest values in the Series, sorted in decreasing order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1152,6 +1510,59 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: """ Return the smallest `n` elements. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> countries_population = {"Italy": 59000000, "France": 65000000, + "Brunei": 434000, "Malta": 434000, + "Maldives": 434000, "Iceland": 337000, + "Nauru": 11300, "Tuvalu": 11300, + "Anguilla": 11300, "Montserrat": 5200} + >>> s = bpd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: Int64 + + The n smallest elements where `n=5` by default. + + >>> s.nsmallest() + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: Int64 + + The n smallest elements where `n=3`. Default keep value is `first` so + Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: Int64 + + The n smallest elements where `n=3` with all duplicates kept. Note that + the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: Int64 + Args: n (int, default 5): Return this many ascending sorted values. @@ -1167,7 +1578,8 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: size larger than `n`. Returns: - bigframes.series.Series: The `n` smallest values in the Series, sorted in increasing order. + bigframes.pandas.Series: + The `n` smallest values in the Series, sorted in increasing order. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1285,9 +1697,10 @@ def apply( the func will be passed the whole Series at once. Returns: - bigframes.series.Series: A new Series with values representing the - return value of the ``func`` applied to each element of the original - Series. + bigframes.pandas.Series: + A new Series with values representing the + return value of the ``func`` applied to each element of the + original Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1310,8 +1723,8 @@ def combine( >>> import numpy as np >>> bpd.options.display.progress_bar = None - Consider 2 Datasets ``s1`` and ``s2`` containing - highest clocked speeds of different birds. + Consider 2 Datasets ``s1`` and ``s2`` containing + highest clocked speeds of different birds. >>> s1 = bpd.Series({'falcon': 330.0, 'eagle': 160.0}) >>> s1 @@ -1325,8 +1738,8 @@ def combine( duck 30.0 dtype: Float64 - Now, to combine the two datasets and view the highest speeds - of the birds across the two datasets + Now, to combine the two datasets and view the highest speeds + of the birds across the two datasets >>> s1.combine(s2, np.maximum) falcon 345.0 @@ -1343,7 +1756,8 @@ def combine( Also accepts some numpy binary functions. Returns: - Series: The result of combining the Series with the other object. + bigframes.pandas.Series: + The result of combining the Series with the other object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1485,7 +1899,8 @@ def groupby( If False, NA values will also be treated as the key in groups. Returns: - bigframes.core.groupby.SeriesGroupBy: Returns a groupby object that contains + bigframes.core.groupby.SeriesGroupBy: + Returns a groupby object that contains information about the groups. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1598,11 +2013,13 @@ def drop( For MultiIndex, level for which the labels will be removed. Returns: - bigframes.series.Series: Series with specified index labels removed + bigframes.pandas.Series or None: + Series with specified index labels removed or None if ``inplace=True``. Raises: - KeyError: If none of the labels are found in the index. + KeyError: + If none of the labels are found in the index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1636,7 +2053,8 @@ def swaplevel(self, i, j): Levels of the indices to be swapped. Can pass level name as string. Returns: - Series: Series with levels swapped in MultiIndex + bigframes.pandas.Series: + Series with levels swapped in MultiIndex """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1654,7 +2072,8 @@ def droplevel(self, level, axis): For `Series` this parameter is unused and defaults to 0. Returns: - Series with requested index / column level(s) removed. + bigframes.pandas.Series: + Series with requested index / column level(s) removed. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1665,33 +2084,25 @@ def interpolate(self, method: str = "linear"): **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({ - ... 'A': [1, 2, 3, None, None, 6], - ... 'B': [None, 6, None, 2, None, 3], - ... }, index=[0, 0.1, 0.3, 0.7, 0.9, 1.0]) - >>> df.interpolate() - A B - 0.0 1.0 - 0.1 2.0 6.0 - 0.3 3.0 4.0 - 0.7 4.0 2.0 - 0.9 5.0 2.5 - 1.0 6.0 3.0 - - [6 rows x 2 columns] - >>> df.interpolate(method="values") - A B - 0.0 1.0 - 0.1 2.0 6.0 - 0.3 3.0 4.666667 - 0.7 4.714286 2.0 - 0.9 5.571429 2.666667 - 1.0 6.0 3.0 - - [6 rows x 2 columns] + Filling in NaN in a Series via linear interpolation. + + >>> s = bpd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 + 3 3.0 + dtype: Float64 + >>> s.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 + dtype: Float64 Args: method (str, default 'linear'): @@ -1702,7 +2113,7 @@ def interpolate(self, method: str = "linear"): 'pad': Fill in NaNs using existing values. 'nearest', 'zero', 'slinear': Emulates `scipy.interpolate.interp1d` Returns: - Series: + bigframes.pandas.Series: Returns the same object type as the caller, interpolated at some or all ``NaN`` values """ @@ -1718,6 +2129,7 @@ def fillna( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([np.nan, 2, np.nan, -1]) @@ -1752,7 +2164,8 @@ def fillna( Value to use to fill holes (e.g. 0). Returns: - Series or None: Object with missing values filled or None. + bigframes.pandas.Series or None: + Object with missing values filled or None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1874,7 +2287,8 @@ def replace( string. Returns: - Series/DataFrame: Object after replacement. + bigframes.pandas.Series/bigframes.pandas.DataFrame: + Object after replacement. Raises: TypeError: @@ -1897,6 +2311,7 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None Drop NA values from a Series: @@ -1939,7 +2354,8 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: Not in use. Kept for compatibility. Returns: - Series: Series with NA entries dropped from it. + bigframes.pandas.Series: + Series with NA entries dropped from it. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -1959,6 +2375,7 @@ def between( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None Boundary values are included by default: @@ -2001,8 +2418,9 @@ def between( Include boundaries. Whether to set each bound as closed or open. Returns: - Series: Series representing whether each element is between left and - right (inclusive). + bigframes.pandas.Series: + Series representing whether each element is between left and + right (inclusive). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2016,6 +2434,7 @@ def case_when( **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> c = bpd.Series([6, 7, 8, 9], name="c") @@ -2036,11 +2455,10 @@ def case_when( **See also:** - - :func:`bigframes.series.Series.mask` : Replace values where the condition is True. + - :func:`bigframes.pandas.Series.mask` : Replace values where the condition is True. Args: - caselist: - A list of tuples of conditions and expected replacements + caselist (A list of tuples of conditions and expected replacements): Takes the form: ``(condition0, replacement0)``, ``(condition1, replacement1)``, ... . ``condition`` should be a 1-D boolean array-like object @@ -2056,7 +2474,7 @@ def case_when( (though pandas doesn`t check it). Returns: - bigframes.series.Series + bigframes.pandas.Series """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2070,6 +2488,7 @@ def cumprod(self): **Examples:** >>> import bigframes.pandas as bpd + >>> import numpy as np >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, np.nan, 5, -1, 0]) @@ -2092,7 +2511,8 @@ def cumprod(self): dtype: Float64 Returns: - bigframes.series.Series: Return cumulative sum of scalar or Series. + bigframes.pandas.Series: + Return cumulative sum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2103,13 +2523,39 @@ def cumsum(self): Returns a DataFrame or Series of the same size containing the cumulative sum. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cumsum() + 0 2.0 + 1 + 2 7.0 + 3 6.0 + 4 6.0 + dtype: Float64 + Args: axis ({0 or 'index', 1 or 'columns'}, default 0): The index or the name of the axis. 0 is equivalent to None or 'index'. For `Series` this parameter is unused and defaults to 0. Returns: - scalar or Series: Return cumulative sum of scalar or Series. + bigframes.pandas.Series: + Return cumulative sum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2120,13 +2566,35 @@ def cummax(self): Returns a DataFrame or Series of the same size containing the cumulative maximum. - Args: - axis ({{0 or 'index', 1 or 'columns'}}, default 0): - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cummax() + 0 2.0 + 1 + 2 5.0 + 3 5.0 + 4 5.0 + dtype: Float64 + Returns: - bigframes.series.Series: Return cumulative maximum of scalar or Series. + bigframes.pandas.Series: + Return cumulative maximum of scalar or Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2137,30 +2605,75 @@ def cummin(self): Returns a DataFrame or Series of the same size containing the cumulative minimum. - Args: - axis ({0 or 'index', 1 or 'columns'}, default 0): - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna (bool, default True): - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - `*args`, `**kwargs`: - Additional keywords have no effect but might be accepted for - compatibility with NumPy. + **Examples:** - Returns: - bigframes.series.Series: Return cumulative minimum of scalar or Series. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: Float64 + + By default, NA values are ignored. + + >>> s.cummin() + 0 2.0 + 1 + 2 2.0 + 3 -1.0 + 4 -1.0 + dtype: Float64 + + Returns: + bigframes.pandas.Series: + Return cumulative minimum of scalar or Series. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def eq(self, other) -> Series: """Return equal of Series and other, element-wise (binary operator eq). - Equivalent to ``other == series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``other == series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.eq(b) + a True + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: Series: The result of the operation. @@ -2171,83 +2684,241 @@ def eq(self, other) -> Series: def ne(self, other) -> Series: """Return not equal of Series and other, element-wise (binary operator ne). - Equivalent to ``other != series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``other != series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.ne(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def le(self, other) -> Series: - """Get 'less than or equal to' of Series and other, element-wise (binary operator `<=`). + """Get 'less than or equal to' of Series and other, element-wise (binary + operator le). - Equivalent to ``series <= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series <= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.le(b) + a True + b + c + d + e + dtype: boolean Args: other: Series, or scalar value Returns: - bigframes.series.Series: The result of the comparison. + bigframes.pandas.Series: + The result of the comparison. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def lt(self, other) -> Series: - """Get 'less than' of Series and other, element-wise (binary operator `<`). + """Get 'less than' of Series and other, element-wise (binary operator lt). - Equivalent to ``series < other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series < other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.lt(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def ge(self, other) -> Series: - """Get 'greater than or equal to' of Series and other, element-wise (binary operator `>=`). + """Get 'greater than or equal to' of Series and other, element-wise + (binary operator ge). - Equivalent to ``series >= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series >= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.ge(b) + a True + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def gt(self, other) -> Series: - """Get 'less than or equal to' of Series and other, element-wise (binary operator `<=`). + """Return Greater than of series and other, element-wise + (binary operator gt). + + Equivalent to ``series <= other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``series <= other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.gt(b) + a False + b + c + d + e + dtype: boolean Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def add(self, other) -> Series: - """Return addition of Series and other, element-wise (binary operator add). + """Return addition of Series and other, element-wise (binary operator + add). - Equivalent to ``series + other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series + other``, but with support to substitute a + fill_value for missing data in either one of the inputs. **Examples:** @@ -2299,10 +2970,11 @@ def add(self, other) -> Series: dtype: Int64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2352,21 +3024,54 @@ def __add__(self, other): Object to be added to the Series. Returns: - Series: The result of adding `other` to Series. + bigframes.pandas.Series: + The result of adding `other` to Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def radd(self, other) -> Series: - """Return addition of Series and other, element-wise (binary operator radd). + """Return addition of Series and other, element-wise (binary operator + radd). + + Equivalent to ``other + series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** - Equivalent to ``other + series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.add(b) + a 2.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2381,7 +3086,8 @@ def __radd__(self, other): Object to which Series should be added. Returns: - Series: The result of adding Series to `other`. + bigframes.pandas.Series: + The result of adding Series to `other`. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2389,16 +3095,48 @@ def sub( self, other, ) -> Series: - """Return subtraction of Series and other, element-wise (binary operator sub). + """Return subtraction of Series and other, element-wise (binary operator + sub). + + Equivalent to ``series - other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None - Equivalent to ``series - other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.subtract(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2448,21 +3186,54 @@ def __sub__(self, other): Object to subtract from the Series. Returns: - Series: The result of subtraction. + bigframes.pandas.Series: + The result of subtraction. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rsub(self, other) -> Series: - """Return subtraction of Series and other, element-wise (binary operator rsub). + """Return subtraction of Series and other, element-wise (binary operator + rsub). + + Equivalent to ``other - series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None - Equivalent to ``other - series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.subtract(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series, or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2477,21 +3248,54 @@ def __rsub__(self, other): Object to subtract the Series from. Returns: - Series: The result of subtraction. + bigframes.pandas.Series: + The result of subtraction. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def mul(self, other) -> Series: - """Return multiplication of Series and other, element-wise (binary operator mul). + """Return multiplication of Series and other, element-wise (binary + operator mul). - Equivalent to ``other * series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``other * series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.multiply(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2530,21 +3334,54 @@ def __mul__(self, other): Object to multiply with the Series. Returns: - Series: The result of the multiplication. + bigframes.pandas.Series: + The result of the multiplication. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rmul(self, other) -> Series: - """Return multiplication of Series and other, element-wise (binary operator mul). + """Return multiplication of Series and other, element-wise (binary + operator mul). + + Equivalent to ``series * others``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None - Equivalent to ``series * others``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.multiply(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2559,21 +3396,53 @@ def __rmul__(self, other): Object to multiply the Series with. Returns: - Series: The result of the multiplication. + bigframes.pandas.Series: The result of the multiplication. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def truediv(self, other) -> Series: - """Return floating division of Series and other, element-wise (binary operator truediv). + """Return floating division of Series and other, element-wise (binary + operator truediv). + + Equivalent to ``series / other``, but with support to substitute a + fill_value for missing data in either one of the inputs. - Equivalent to ``series / other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divide(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2612,21 +3481,53 @@ def __truediv__(self, other): Object to divide the Series by. Returns: - Series: The result of the division. + bigframes.pandas.Series: + The result of the division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rtruediv(self, other) -> Series: - """Return floating division of Series and other, element-wise (binary operator rtruediv). + """Return floating division of Series and other, element-wise (binary + operator rtruediv). + + Equivalent to ``other / series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``other / series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + >>> a.divide(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2642,21 +3543,53 @@ def __rtruediv__(self, other): Object to divide by the Series. Returns: - Series: The result of the division. + bigframes.pandas.Series: The result of the division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def floordiv(self, other) -> Series: - """Return integer division of Series and other, element-wise (binary operator floordiv). + """Return integer division of Series and other, element-wise + (binary operator floordiv). - Equivalent to ``series // other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series // other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.floordiv(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2695,21 +3628,54 @@ def __floordiv__(self, other): Object to divide the Series by. Returns: - Series: The result of the integer divison. + bigframes.pandas.Series: + The result of the integer division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rfloordiv(self, other) -> Series: - """Return integer division of Series and other, element-wise (binary operator rfloordiv). + """Return integer division of Series and other, element-wise (binary + operator rfloordiv). + + Equivalent to ``other // series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 - Equivalent to ``other // series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> a.floordiv(b) + a 1.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2725,21 +3691,53 @@ def __rfloordiv__(self, other): Object to divide by the Series. Returns: - Series: The result of the integer division. + bigframes.pandas.Series: + The result of the integer division. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def mod(self, other) -> Series: """Return modulo of Series and other, element-wise (binary operator mod). - Equivalent to ``series % other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series % other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.mod(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2778,21 +3776,53 @@ def __mod__(self, other): Object to modulo the Series by. Returns: - Series: The result of the modulo. + bigframes.pandas.Series: + The result of the modulo. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rmod(self, other) -> Series: """Return modulo of Series and other, element-wise (binary operator mod). - Equivalent to ``series % other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + Equivalent to ``series % other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.mod(b) + a 0.0 + b + c + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2808,21 +3838,54 @@ def __rmod__(self, other): Object to modulo by the Series. Returns: - Series: The result of the modulo. + bigframes.pandas.Series: + The result of the modulo. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def pow(self, other) -> Series: - """Return Exponential power of series and other, element-wise (binary operator `pow`). + """Return Exponential power of series and other, element-wise (binary + operator `pow`). + + Equivalent to ``series ** other``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``series ** other``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.pow(b) + a 1.0 + b 1.0 + c 1.0 + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2862,21 +3925,54 @@ def __pow__(self, other): Object to exponentiate the Series with. Returns: - Series: The result of the exponentiation. + bigframes.pandas.Series: + The result of the exponentiation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def rpow(self, other) -> Series: - """Return Exponential power of series and other, element-wise (binary operator `rpow`). + """Return Exponential power of series and other, element-wise (binary + operator `rpow`). + + Equivalent to ``other ** series``, but with support to substitute a + fill_value for missing data in either one of the inputs. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 - Equivalent to ``other ** series``, but with support to substitute a fill_value for - missing data in either one of the inputs. + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.pow(b) + a 1.0 + b 1.0 + c 1.0 + d + e + dtype: Float64 Args: - other (Series, or scalar value): + other (Series or scalar value) Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2893,21 +3989,60 @@ def __rpow__(self, other): Object to exponentiate with the Series. Returns: - Series: The result of the exponentiation. + bigframes.pandas.Series: + The result of the exponentiation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def divmod(self, other) -> Series: - """Return integer division and modulo of Series and other, element-wise (binary operator divmod). + """Return integer division and modulo of Series and other, element-wise + (binary operator divmod). Equivalent to divmod(series, other). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divmod(b) + (a 1.0 + b + c + d + e + dtype: Float64, + a 0.0 + b + c + d + e + dtype: Float64) + Args: other: Series, or scalar value Returns: - 2-Tuple of Series: The result of the operation. The result is always - consistent with (floordiv, mod) (though pandas may not). + Tuple[bigframes.pandas.Series, bigframes.pandas.Series]: + The result of the operation. The result is always + consistent with (floordiv, mod) (though pandas may not). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2917,12 +4052,49 @@ def rdivmod(self, other) -> Series: Equivalent to other divmod series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) + >>> a + a 1.0 + b 1.0 + c 1.0 + d + dtype: Float64 + + >>> b = bpd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) + >>> b + a 1.0 + b + d 1.0 + e + dtype: Float64 + + >>> a.divmod(b) + (a 1.0 + b + c + d + e + dtype: Float64, + a 0.0 + b + c + d + e + dtype: Float64) + Args: other: Series, or scalar value Returns: - 2-Tuple of Series: The result of the operation. The result is always - consistent with (rfloordiv, rmod) (though pandas may not). + Tuple[bigframes.pandas.Series, bigframes.pandas.Series]: + The result of the operation. The result is always + consistent with (rfloordiv, rmod) (though pandas may not). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2965,7 +4137,8 @@ def combine_first(self, other) -> Series: The value(s) to be used for filling null values. Returns: - Series: The result of combining the provided Series with the other object. + bigframes.pandas.Series: + The result of combining the provided Series with the other object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3039,6 +4212,9 @@ def update(self, other) -> None: Args: other (Series, or object coercible into Series) + + Returns: + None """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3052,7 +4228,8 @@ def all( DataFrame axis that is False or equivalent (e.g. zero or empty). Returns: - scalar or Series: If level is specified, then, Series is returned; + scalar or bigframes.pandas.Series: + If level is specified, then, Series is returned; otherwise, scalar is returned. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3066,8 +4243,26 @@ def any( Returns False unless there is at least one element within a series or along a Dataframe axis that is True or equivalent (e.g. non-zero or non-empty). + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import numpy as np + >>> bpd.options.display.progress_bar = None + + For Series input, the output is a scalar indicating whether any element is True. + + >>> bpd.Series([False, False]).any() + False + >>> bpd.Series([True, False]).any() + True + >>> bpd.Series([], dtype="float64").any() + False + >>> bpd.Series([np.nan]).any() + False + Returns: - scalar or Series: If level is specified, then, Series is returned; + scalar or bigframes.pandas.Series: + If level is specified, then, Series is returned; otherwise, scalar is returned. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3184,9 +4379,8 @@ def std( height 0.237417 dtype: Float64 - Returns - ------- - scalar or Series (if level specified) + Returns: + scalar or bigframes.pandas.Series (if level specified) """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3199,7 +4393,8 @@ def var( Normalized by N-1 by default. Returns: - scalar or Series (if level specified): Variance. + scalar or bigframes.pandas.Series (if level specified): + Variance. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3276,6 +4471,30 @@ def mean(self): def median(self, *, exact: bool = True): """Return the median of the values over the requested axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3]) + >>> s.median() + np.float64(2.0) + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) + >>> df + a b + tiger 1 2 + zebra 2 3 + + [2 rows x 2 columns] + + >>> df.median() + a 1.5 + b 2.5 + dtype: float64 + Args: exact (bool. default True): Default True. Get the exact median instead of an approximate @@ -3297,6 +4516,7 @@ def quantile( >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) np.float64(2.5) @@ -3307,11 +4527,11 @@ def quantile( dtype: Float64 Args: - q (float or array-like, default 0.5 (50% quantile)): + q (Union[float, Sequence[float], default 0.5 (50% quantile)): The quantile(s) to compute, which can lie in range: 0 <= q <= 1. Returns: - float or Series: + Union[float, bigframes.pandas.Series]: If ``q`` is an array, a Series will be returned where the index is ``q`` and the values are the quantiles, otherwise a float will be returned. @@ -3331,6 +4551,30 @@ def skew(self): Normalized by N-1. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = pd.Series([1, 2, 3]) + >>> s.skew() + 0.0 + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]}, + ... index=['tiger', 'zebra', 'cow']) + >>> df + a b c + tiger 1 2 1 + zebra 2 3 3 + cow 3 4 5 + >>> df.skew() + a 0.0 + b 0.0 + c 0.0 + dtype: float64 + Returns: scalar: Scalar. """ @@ -3339,46 +4583,69 @@ def skew(self): def kurt(self): """Return unbiased kurtosis over requested axis. - Kurtosis obtained using Fisher’s definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1. + Kurtosis obtained using Fisher’s definition of kurtosis (kurtosis of + normal == 0.0). Normalized by N-1. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) + >>> s + cat 1 + dog 2 + dog 2 + mouse 3 + dtype: Int64 + np.float64(1.5) + + With a DataFrame + + >>> df = bpd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, + ... index=['cat', 'dog', 'dog', 'mouse']) + >>> df + a b + cat 1 3 + dog 2 4 + dog 2 4 + mouse 3 4 + + [4 rows x 2 columns] + >>> df.kurt() + a 1.5 + b 4.0 + dtype: Float64 Returns: - scalar or scalar: Unbiased kurtosis over requested axis. + scalar or scalar: + Unbiased kurtosis over requested axis. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def items(self): """ - Iterate over (index, value) pairs of a Series. + Lazily iterate over (index, value) tuples. - Iterates over the Series contents, returning a tuple with - the index and the value of a Series. + This method returns an iterable tuple (index, value). + This is convenient if you want to create a lazy iterator. **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['bear', 'bear', 'marsupial'], - ... index=['panda', 'polar', 'koala']) - >>> s - panda bear - polar bear - koala marsupial - dtype: string - + >>> s = bpd.Series(['A', 'B', 'C']) >>> for index, value in s.items(): - ... print(f'--> index: {index}') - ... print(f'--> value: {value}') - ... - --> index: panda - --> value: bear - --> index: polar - --> value: bear - --> index: koala - --> value: marsupial + print(f"Index : {index}, Value : {value}") + Index : 0, Value : A + Index : 1, Value : B + Index : 2, Value : C Returns: - Iterator: Iterator of index, value for each content of the Series. + iterable: + Iterable of tuples containing the (index, value) pairs from a + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3445,7 +4712,7 @@ def where(self, cond, other): extension dtypes). Returns: - bigframes.series.Series: Series after the replacement. + bigframes.pandas.Series: Series after the replacement. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3567,7 +4834,8 @@ def mask(self, cond, other): extension dtypes). Returns: - bigframes.series.Series: Series after the replacement. + bigframes.pandas.Series: + Series after the replacement. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3580,13 +4848,16 @@ def clip(self, lower, upper): Args: lower (float or array-like, default None): - Minimum threshold value. All values below this threshold will be set to it. A missing threshold (e.g NA) will not clip the value. + Minimum threshold value. All values below this threshold will + be set to it. A missing threshold (e.g NA) will not clip the value. upper (float or array-like, default None): - Maximum threshold value. All values above this threshold will be set to it. A missing threshold (e.g NA) will not clip the value. + Maximum threshold value. All values above this threshold will + be set to it. A missing threshold (e.g NA) will not clip the value. Returns: - Series: Series. + bigframes.pandas.Series: + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3594,20 +4865,17 @@ def unstack(self, level): """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. - Args: - level (int, str, or list of these, default last level): - Level(s) to unstack, can pass level name. - Returns: - DataFrame: Unstacked Series. + bigframes.pandas.DataFrame: Unstacked Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def argmax(self): """ - Return int position of the smallest value in the series. + Return int position of the largest value in the series. - If the minimum is achieved in multiple locations, the first row position is returned. + If the maximum is achieved in multiple locations, the first row position + is returned. **Examples:** @@ -3635,15 +4903,17 @@ def argmax(self): calories is the first element, since series is zero-indexed. Returns: - Series: Row position of the maximum value. + int: + Row position of the maximum value. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def argmin(self): """ - Return int position of the largest value in the Series. + Return int position of the smallest value in the Series. - If the maximum is achieved in multiple locations, the first row position is returned. + If the minimum is achieved in multiple locations, the first row position + is returned. **Examples:** @@ -3671,7 +4941,8 @@ def argmin(self): calories is the first element, since series is zero-indexed. Returns: - Series: Row position of the minimum value. + int: + Row position of the minimum value. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3721,7 +4992,8 @@ def rename(self, index, **kwargs) -> Series | None: attribute. Returns: - bigframes.series.Series: Series with index labels. + bigframes.pandas.Series: + Series with index labels. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3734,8 +5006,53 @@ def rename_axis(self, mapper, **kwargs): mapper (scalar, list-like, optional): Value to set the axis name attribute. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + Series + + >>> s = bpd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: string + + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: string + + DataFrame + + >>> df = bpd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + + [3 rows x 2 columns] + + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + [3 rows x 2 columns] + Returns: - bigframes.series.Series: Series with the name of the axis set. + bigframes.pandas.Series or bigframes.pandas.DataFrame: + The same type as the caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3819,7 +5136,8 @@ def value_counts( Don't include counts of NaN. Returns: - Series: Series containing counts of unique values. + bigframes.pandas.Series: + Series containing counts of unique values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3860,6 +5178,16 @@ def plot(self): """ Make plots of Series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> ser = bpd.Series([1, 2, 3, 3]) + >>> plot = ser.plot(kind='hist', title="My plot") + >>> plot + + Returns: bigframes.operations.plotting.PlotAccessor: An accessor making plots. @@ -3894,7 +5222,9 @@ def isin(self, values): 5 hippo Name: animal, dtype: string - >>> s.isin(['cow', 'llama']) + To invert the boolean values, use the ~ operator: + + >>> ~s.isin(['cow', 'llama']) 0 True 1 True 2 True @@ -3903,6 +5233,18 @@ def isin(self, values): 5 False Name: animal, dtype: boolean + Passing a single string as s.isin('llama') will raise an error. Use a + list of one element instead: + + >>> s.isin(['llama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: boolean + Strings and integers are distinct and are therefore not comparable: >>> bpd.Series([1]).isin(['1']) @@ -3918,7 +5260,7 @@ def isin(self, values): TypeError. Instead, turn a single string into a list of one element. Returns: - bigframes.series.Series: Series of booleans indicating if each element is in values. + bigframes.pandas.Series: Series of booleans indicating if each element is in values. Raises: TypeError: If input is not list-like. @@ -3944,7 +5286,8 @@ def is_monotonic_increasing(self) -> bool: np.False_ Returns: - bool: Boolean. + bool: + Boolean. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -3967,7 +5310,8 @@ def is_monotonic_decreasing(self) -> bool: np.False_ Returns: - bool: Boolean. + bool: + Boolean. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4051,7 +5395,8 @@ def map( index entry. Returns: - Series: Same index as caller. + bigframes.pandas.Series: + Same index as caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4059,8 +5404,79 @@ def map( def iloc(self): """Purely integer-location based indexing for selection by position. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}] + >>> df = pd.DataFrame(mydict) + >>> df + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + [3 rows x 4 columns] + + Indexing just the rows + + With a scalar integer. + + >>> type(df.iloc[0]) + pandas.core.series.Series + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: Int64 + + With a list of integers. + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: Int64 + + >>> type(df.iloc[[0]]) + bigframes.dataframe.DataFrame + + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + [2 rows x 4 columns] + + With a slice object. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + [3 rows x 4 columns] + + Indexing both axes + + You can mix the indexer types for the index and columns. Use : to select + the entire axis. + + With scalar integers. + + >>> df.iloc[0, 1] + np.int64(2) + Returns: - bigframes.core.indexers.IlocSeriesIndexer: Purely integer-location Indexers. + bigframes.core.indexers.IlocSeriesIndexer: + Purely integer-location Indexers. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4068,8 +5484,86 @@ def iloc(self): def loc(self): """Access a group of rows and columns by label(s) or a boolean array. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame([[1, 2], [4, 5], [7, 8]], + index=['cobra', 'viper', 'sidewinder'], + columns=['max_speed', 'shield']) + >>> df + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 + + [3 rows x 2 columns] + + Single label. Note this returns the row as a Series. + + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: Int64 + + List of labels. Note using [[]] returns a DataFrame. + + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 + + [2 rows x 2 columns] + + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. + + >>> df.loc['cobra', 'shield'] + np.int64(2) + + Index (same behavior as df.reindex) + + >>> df.loc[bpd.Index(["cobra", "viper"], name="foo")] + max_speed shield + cobra 1 2 + viper 4 5 + + [2 rows x 2 columns] + + Conditional that returns a boolean Series with column labels specified + + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 + + [1 rows x 1 columns] + + Multiple conditional using | that returns a boolean Series + + >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)] + max_speed shield + cobra 1 2 + sidewinder 7 8 + + [2 rows x 2 columns] + + Please ensure that each condition is wrapped in parentheses (). + + Set value for an entire column + + >>> df.loc[:, 'max_speed'] = 30 + >>> df + max_speed shield + cobra 30 2 + viper 30 5 + sidewinder 30 8 + + [3 rows x 2 columns] + Returns: - bigframes.core.indexers.LocSeriesIndexer: Indexers object. + bigframes.core.indexers.LocSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4080,21 +5574,31 @@ def iat(self): **Examples:** >>> import bigframes.pandas as bpd - >>> s = bpd.Series(bpd.Series([1, 2, 3])) >>> bpd.options.display.progress_bar = None - >>> s - 0 1 - 1 2 - 2 3 - dtype: Int64 - Get value at specified row number + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 0 2 3 + 1 0 4 1 + 2 10 20 30 + + [3 rows x 3 columns] + + Get value at specified row/column pair + + >>> df.iat[1, 2] + np.int64(1) + + Get value within a series - >>> s.iat[1] + >>> df.loc[0].iat[1] np.int64(2) Returns: - bigframes.core.indexers.IatSeriesIndexer: Indexers object. + bigframes.core.indexers.IatSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4105,22 +5609,31 @@ def at(self): **Examples:** >>> import bigframes.pandas as bpd - >>> s = bpd.Series([1, 2, 3], index=['A', 'B', 'C']) >>> bpd.options.display.progress_bar = None - >>> s - A 1 - B 2 - C 3 - dtype: Int64 - Get value at specified row label + >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df + A B C + 4 0 2 3 + 5 0 4 1 + 6 10 20 30 + + [3 rows x 3 columns] + + Get value at specified row/column pair - >>> s.at['B'] + >>> df.at[4, 'B'] np.int64(2) + Get value at specified row label + + >>> df.loc[5].at['B'] + np.int64(4) Returns: - bigframes.core.indexers.AtSeriesIndexer: Indexers object. + bigframes.core.indexers.AtSeriesIndexer: + Indexers object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4141,7 +5654,8 @@ def values(self): array(['a', 'a', 'b', 'c'], dtype=object) Returns: - numpy.ndarray or ndarray-like: Values in the Series. + numpy.ndarray or ndarray-like: + Values in the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4157,7 +5671,11 @@ def size(self) -> int: For Series: - >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + 0 Ant + 1 Bear + 2 Cow + dtype: string >>> s.size 3 @@ -4168,7 +5686,8 @@ def size(self) -> int: 3 Returns: - int: Return the number of elements in the underlying data. + int: + Return the number of elements in the underlying data. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4236,7 +5755,8 @@ def __invert__(self): dtype: boolean Returns: - Series: The inverted values in the series. + bigframes.pandas.Series: + The inverted values in the series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4274,7 +5794,8 @@ def __and__(self, other): Object to bitwise AND with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4312,7 +5833,8 @@ def __or__(self, other): Object to bitwise OR with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4350,7 +5872,8 @@ def __xor__(self, other): Object to bitwise XOR with the Series. Returns: - bigframes.series.Series: The result of the operation. + bigframes.pandas.Series: + The result of the operation. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -4375,6 +5898,7 @@ def __getitem__(self, indexer): Index or slice of indices. Returns: - Series or Value: Value(s) at the requested index(es). + bigframes.pandas.Series or Value: + Value(s) at the requested index(es). """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 2986a9574f8de4e499980da8010f466b8af0eb7e Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 8 Jan 2025 17:14:12 -0600 Subject: [PATCH 02/16] update the rest of the methods --- .../bigframes_vendored/pandas/core/series.py | 269 ++++++++++++++++-- 1 file changed, 251 insertions(+), 18 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index a32b0525b0..c5c02c0454 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -35,6 +35,52 @@ def dt(self): """ Accessor object for datetime-like properties of the Series values. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> seconds_series = bpd.Series(bpd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series + + >>> 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: timestamp[us][pyarrow] + + >>> seconds_series.dt.second + 0 0 + 1 1 + 2 2 + dtype: Int64 + + >>> hours_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) + >>> hours_series + 0 2000-01-01 00:00:00 + 1 2000-01-01 01:00:00 + 2 2000-01-01 02:00:00 + dtype: timestamp[us][pyarrow] + + >>> hours_series.dt.hour + 0 0 + 1 1 + 2 2 + dtype: Int64 + + >>> quarters_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="QE")) + >>> quarters_series + 0 2000-03-31 00:00:00 + 1 2000-06-30 00:00:00 + 2 2000-09-30 00:00:00 + dtype: timestamp[us][pyarrow] + + >>> quarters_series.dt.quarter + 0 1 + 1 2 + 2 3 + dtype: Int64 + Returns: bigframes.operations.datetimes.DatetimeMethods: An accessor containing datetime methods. @@ -105,7 +151,8 @@ def index(self): dtype=object) Returns: - Index: The index object of the Series. + Index: + The index object of the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -168,6 +215,13 @@ def name(self) -> Hashable: >>> s.name 'Numbers' + >>> s.name = "Integers" + >>> s + 0 1 + 1 2 + 2 3 + Name: Integers, dtype: Int64 + If the Series is part of a DataFrame: >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) @@ -182,7 +236,8 @@ def name(self) -> Hashable: 'col1' Returns: - hashable object: The name of the Series, also the column name + hashable object: + The name of the Series, also the column name if part of a DataFrame. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -260,7 +315,8 @@ def transpose(self) -> Series: dtype: string Returns: - Series: Series. + bigframes.pandas.Series: + Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -324,6 +380,24 @@ def reset_index( 3 4 Name: foo, dtype: Int64 + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), + np.array(['one', 'two', 'one', 'two'])] + >>> s2 = pd.Series( + ... range(4), name='foo', + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + + If level is not set, all levels are removed from the Index. + + >>> s2.reset_index() + a b foo + 0 bar one 0 + 1 bar two 1 + 2 baz one 2 + 3 baz two 3 + + [4 rows x 3 columns] + Args: drop (bool, default False): Just reset the index, without inserting it as a column in @@ -334,7 +408,8 @@ def reset_index( when `drop` is True. Returns: - Series or DataFrame or None; When `drop` is False (the default), + bigframes.pandas.Series or bigframes.pandas.DataFrame or None: + When `drop` is False (the default), a DataFrame is returned. The newly created columns will come first in the DataFrame, followed by the original Series values. When `drop` is True, a `Series` is returned. @@ -368,6 +443,15 @@ def to_string( """ Render a string representation of the Series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> ser = bpd.Series([1, 2, 3]).to_string() + >>> ser + '0 1\n1 2\n2 3' + Args: buf (StringIO-like, optional): Buffer to write to. @@ -394,7 +478,8 @@ def to_string( of rows is above `max_rows`). Returns: - str or None: String representation of Series if ``buf=None``, + str or None: + String representation of Series if ``buf=None``, otherwise None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -407,7 +492,36 @@ def to_markdown( **kwargs, ) -> str | None: """ - Print {klass} in Markdown-friendly format. + Print Series in Markdown-friendly format. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+ Args: buf (str, Path or StringIO-like, optional, default None): @@ -418,7 +532,8 @@ def to_markdown( Add index (row) labels. Returns: - str: {klass} in Markdown-friendly format. + str: + Series in Markdown-friendly format. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -426,6 +541,23 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: """ Convert Series to {label -> value} dict or dict-like object. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> from collections import OrderedDict, defaultdict + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {0: 1, 1: 2, 2: 3, 3: 4} + + >>> s.to_dict(into=OrderedDict) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + + >>> dd = defaultdict(list) + >>> s.to_dict(into=dd) + defaultdict(list, {0: 1, 1: 2, 2: 3, 3: 4}) + Args: into (class, default dict): The collections.abc.Mapping subclass to use as the return @@ -434,7 +566,8 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: collections.defaultdict, you must pass it initialized. Returns: - collections.abc.Mapping: Key-value representation of Series. + collections.abc.Mapping: + Key-value representation of Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -445,11 +578,27 @@ def to_frame(self, name=None) -> DataFrame: The column in the new dataframe will be named name (the keyword parameter) if the name parameter is provided and not None. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c + + [3 rows x 1 columns] + Args: name (Hashable, default None) Returns: - bigframes.dataframe.DataFrame: DataFrame representation of Series. + bigframes.pandas .DataFrame: + DataFrame representation of Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -491,7 +640,8 @@ def to_latex(self, buf=None, columns=None, header=True, index=True, **kwargs): Write row names (index). Returns: - str or None: If buf is None, returns the result as a string. + str or None: + If buf is None, returns the result as a string. Otherwise returns None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -520,7 +670,8 @@ def tolist(self) -> list: [1, 2, 3] Returns: - list: list of the values + list: + list of the values """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -530,6 +681,32 @@ def to_numpy(self, dtype, copy=False, na_value=None): """ A NumPy ndarray representing the values in this Series or Index. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pandas as pd + >>> bpd.options.display.progress_bar = None + + >>> ser = bpd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + + Specify the dtype to control how datetime-aware data is represented. Use + dtype=object to return an ndarray of pandas Timestamp objects, each with + the correct tz. + + >>> ser = bpd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser.to_numpy(dtype=object) + array([Timestamp('1999-12-31 23:00:00+0000', tz='UTC'), + Timestamp('2000-01-01 23:00:00+0000', tz='UTC')], dtype=object) + + Or ``dtype=datetime64[ns]`` to return an ndarray of native datetime64 values. + The values are converted to UTC and the timezone info is dropped. + + >>> ser.to_numpy(dtype="datetime64[ns]") + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], + dtype='datetime64[ns]') + Args: dtype (str or numpy.dtype, optional): The dtype to pass to :meth:`numpy.asarray`. @@ -546,7 +723,8 @@ def to_numpy(self, dtype, copy=False, na_value=None): of the underlying array (for extension arrays). Returns: - numpy.ndarray: A NumPy ndarray representing the values in this + numpy.ndarray: + A NumPy ndarray representing the values in this Series or Index. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -555,11 +733,43 @@ def to_pickle(self, path, **kwargs): """ Pickle (serialize) object to file. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> original_df = bpd.DataFrame({"foo": range(5), "bar": range(5, 10)}) + >>> original_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + [5 rows x 2 columns] + + >>> original_df.to_pickle("./dummy.pkl") + + >>> unpickled_df = bpd.read_pickle("./dummy.pkl") + >>> unpickled_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + [5 rows x 2 columns] + Args: path (str, path object, or file-like object): String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. File path where the pickled object will be stored. + + Returns: + None """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -568,7 +778,8 @@ def to_xarray(self): Return an xarray object from the pandas object. Returns: - xarray.DataArray or xarray.Dataset: Data in the pandas structure + xarray.DataArray or xarray.Dataset: + Data in the pandas structure converted to Dataset if the object is a DataFrame, or a DataArray if the object is a Series. """ @@ -606,7 +817,8 @@ def agg(self, func): function names, e.g. ``['sum', 'mean']``. Returns: - scalar or Series: Aggregated results + scalar or bigframes.pandas.Series: + Aggregated results """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -629,8 +841,8 @@ def count(self): np.int64(2) Returns: - int or Series (if level specified): Number of non-null values in the - Series. + int or bigframes.pandas.Series (if level specified): + Number of non-null values in the Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -640,8 +852,26 @@ def nunique(self) -> int: Excludes NA values by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 3, 5, 7, 7]) + >>> s + 0 1 + 1 3 + 2 5 + 3 7 + 4 7 + dtype: Int64 + + >>> s.nunique() + np.int64(4) + Returns: - int: number of unique elements in the object. + int: + number of unique elements in the object. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -671,6 +901,7 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Example with order preservation: Slower, but keeps order + >>> s.unique() 0 2 1 1 @@ -678,6 +909,7 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Example without order preservation: Faster, but loses original order + >>> s.unique(keep_order=False) 0 1 1 2 @@ -685,7 +917,8 @@ def unique(self, keep_order=True) -> Series: Name: A, dtype: Int64 Returns: - bigframes.pandas.Series: The unique values returned as a Series. + bigframes.pandas.Series: + The unique values returned as a Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From f5a9c3b912f8696823adb7e2ba016bfb1ec33caa Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 9 Jan 2025 14:59:56 -0600 Subject: [PATCH 03/16] fix formatting --- .../bigframes_vendored/pandas/core/series.py | 80 +++++++------------ 1 file changed, 30 insertions(+), 50 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index c5c02c0454..8cf9d492cd 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -43,11 +43,10 @@ def dt(self): >>> seconds_series = bpd.Series(bpd.date_range("2000-01-01", periods=3, freq="s")) >>> seconds_series - - >>> 0 2000-01-01 00:00:00 - 1 2000-01-01 00:00:01 - 2 2000-01-01 00:00:02 - dtype: timestamp[us][pyarrow] + 0 2000-01-01 00:00:00 + 1 2000-01-01 00:00:01 + 2 2000-01-01 00:00:02 + dtype: timestamp[us][pyarrow] >>> seconds_series.dt.second 0 0 @@ -143,12 +142,12 @@ def index(self): Name: Age, dtype: Int64 >>> s1.index # doctest: +ELLIPSIS MultiIndex([( 'Alice', 'Seattle'), - ( 'Bob', 'New York'), - ('Aritra', 'Kona')], - names=['Name', 'Location']) + ( 'Bob', 'New York'), + ('Aritra', 'Kona')], + names=['Name', 'Location']) >>> s1.index.values array([('Alice', 'Seattle'), ('Bob', 'New York'), ('Aritra', 'Kona')], - dtype=object) + dtype=object) Returns: Index: @@ -584,7 +583,7 @@ def to_frame(self, name=None) -> DataFrame: >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["a", "b", "c"], - ... name="vals") + ... name="vals") >>> s.to_frame() vals 0 a @@ -597,7 +596,7 @@ def to_frame(self, name=None) -> DataFrame: name (Hashable, default None) Returns: - bigframes.pandas .DataFrame: + bigframes.pandas.DataFrame: DataFrame representation of Series. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -671,7 +670,7 @@ def tolist(self) -> list: Returns: list: - list of the values + list of the values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -818,7 +817,7 @@ def agg(self, func): Returns: scalar or bigframes.pandas.Series: - Aggregated results + Aggregated results. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -933,32 +932,13 @@ def mode(self) -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> import pandas as pd >>> bpd.options.display.progress_bar = None - >>> bpd.Series([2, 1, 3, 3], name='A').unique() - 0 2 - 1 1 - 2 3 - Name: A, dtype: Int64 - - - >>> bpd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() - 0 2016-01-01 00:00:00 - dtype: timestamp[us][pyarrow] - - An Categorical will return categories in the order of appearance and - with the same dtype. - - >>> bpd.Series(pd.Categorical(list('baabc'))).unique() - ['b', 'a', 'c'] - - >>> bpd.Series(pd.Categorical(list('baabc'), categories=list('abc'), - ... ordered=True)).unique() - 0 b - 1 a - 4 c - dtype: string + >>> s = bpd.Series([2, 4, 8, 2, 4, None]) + >>> s.mode() + 0 2.0 + 1 4.0 + dtype: Float64 Returns: bigframes.pandas.Series: @@ -982,7 +962,7 @@ def drop_duplicates( Generate a Series with duplicated entries. >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], - name='animal') + ... name='animal') >>> s 0 llama 1 cow @@ -1121,7 +1101,7 @@ def idxmin(self) -> Hashable: >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(data=[1, None, 4, 1], - ... index=['A', 'B', 'C', 'D']) + ... index=['A', 'B', 'C', 'D']) >>> s A 1.0 B @@ -1376,7 +1356,6 @@ def diff(self) -> Series: 5 6 dtype: Int64 - Difference with following row >>> s.diff(periods=-1) @@ -1604,9 +1583,9 @@ def sort_index( Specify index level to sort >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', - 'baz', 'baz', 'bar', 'bar']), - np.array(['two', 'one', 'two', 'one', - 'two', 'one', 'two', 'one'])] + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1) bar one 8 @@ -4870,7 +4849,7 @@ def items(self): >>> s = bpd.Series(['A', 'B', 'C']) >>> for index, value in s.items(): - print(f"Index : {index}, Value : {value}") + ... print(f"Index : {index}, Value : {value}") Index : 0, Value : A Index : 1, Value : B Index : 2, Value : C @@ -4945,7 +4924,8 @@ def where(self, cond, other): extension dtypes). Returns: - bigframes.pandas.Series: Series after the replacement. + bigframes.pandas.Series: + Series after the replacement. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -5263,8 +5243,8 @@ def rename_axis(self, mapper, **kwargs): DataFrame >>> df = bpd.DataFrame({"num_legs": [4, 4, 2], - ... "num_arms": [0, 0, 2]}, - ... ["dog", "cat", "monkey"]) + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) >>> df num_legs num_arms dog 4 0 @@ -5723,8 +5703,8 @@ def loc(self): >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[1, 2], [4, 5], [7, 8]], - index=['cobra', 'viper', 'sidewinder'], - columns=['max_speed', 'shield']) + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) >>> df max_speed shield cobra 1 2 @@ -5845,7 +5825,7 @@ def at(self): >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], - index=[4, 5, 6], columns=['A', 'B', 'C']) + ... index=[4, 5, 6], columns=['A', 'B', 'C']) >>> df A B C 4 0 2 3 From e7dd2965eaeca0015124d0f2a2de61d94b8a79e7 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 9 Jan 2025 15:09:55 -0600 Subject: [PATCH 04/16] fix indentation error --- third_party/bigframes_vendored/pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8cf9d492cd..f45f1edb65 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3734,6 +3734,7 @@ def rtruediv(self, other) -> Series: d e dtype: Float64 + Args: other (Series or scalar value) From 3b1c635fb793b5e69a438c4c6d739656d3216811 Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 10 Jan 2025 12:30:10 -0600 Subject: [PATCH 05/16] fix some docstrings and a doctest error --- third_party/bigframes_vendored/pandas/core/series.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index f45f1edb65..8026be9287 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -449,7 +449,9 @@ def to_string( >>> ser = bpd.Series([1, 2, 3]).to_string() >>> ser - '0 1\n1 2\n2 3' + '0 1 + 1 2 + 2 3' Args: buf (StringIO-like, optional): @@ -478,8 +480,7 @@ def to_string( Returns: str or None: - String representation of Series if ``buf=None``, - otherwise None. + String representation of Series if ``buf=None``, otherwise None. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @@ -2499,7 +2500,7 @@ def replace( string. Returns: - bigframes.pandas.Series/bigframes.pandas.DataFrame: + bigframes.pandas.Series or bigframes.pandas.DataFrame: Object after replacement. Raises: From 38b2358610745195d6588ba48d71d12c97a7447b Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 10 Jan 2025 15:51:46 -0600 Subject: [PATCH 06/16] fix formatting --- .../bigframes_vendored/pandas/core/series.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8026be9287..d5bfb1a2ef 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1271,6 +1271,7 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05]) >>> s.autocorr() # doctest: +ELLIPSIS np.float64(0.10355263309024067) + >>> s.autocorr(lag=2) np.float64(-1.0) @@ -1348,7 +1349,7 @@ def diff(self) -> Series: Difference with 3rd previous row - >>>s.diff(periods=3) + >>> s.diff(periods=3) 0 1 2 @@ -4335,8 +4336,8 @@ def combine_first(self, other) -> Series: 2 5.0 dtype: Float64 - Null values still persist if the location of that null value - does not exist in `other` + Null values still persist if the location of that null value + does not exist in `other` >>> s1 = bpd.Series({'falcon': np.nan, 'eagle': 160.0}) >>> s2 = bpd.Series({'eagle': 200.0, 'duck': 30.0}) @@ -4405,8 +4406,8 @@ def update(self, other) -> None: 2 6 dtype: Int64 - ``other`` can also be a non-Series object type - that is coercible into a Series + ``other`` can also be a non-Series object type + that is coercible into a Series >>> s = bpd.Series([1, 2, 3]) >>> s.update([4, np.nan, 6]) @@ -4467,10 +4468,13 @@ def any( >>> bpd.Series([False, False]).any() False + >>> bpd.Series([True, False]).any() True + >>> bpd.Series([], dtype="float64").any() False + >>> bpd.Series([np.nan]).any() False @@ -4502,6 +4506,7 @@ def max( 0 1 1 3 dtype: Int64 + >>> s.max() np.int64(3) @@ -4513,6 +4518,7 @@ def max( 1 3 2 dtype: Int64 + >>> s.max() np.int64(3) @@ -4542,6 +4548,7 @@ def min( 0 1 1 3 dtype: Int64 + >>> s.min() np.int64(1) @@ -4553,6 +4560,7 @@ def min( 1 3 2 dtype: Int64 + >>> s.min() np.int64(1) @@ -4629,6 +4637,7 @@ def sum(self): 0 1 1 3 dtype: Int64 + >>> s.sum() np.int64(4) @@ -4640,6 +4649,7 @@ def sum(self): 1 3 2 dtype: Int64 + >>> s.sum() np.int64(4) @@ -4663,6 +4673,7 @@ def mean(self): 0 1 1 3 dtype: Int64 + >>> s.mean() np.float64(2.0) @@ -4674,6 +4685,7 @@ def mean(self): 1 3 2 dtype: Int64 + >>> s.mean() np.float64(2.0) @@ -4734,6 +4746,7 @@ def quantile( >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) np.float64(2.5) + >>> s.quantile([.25, .5, .75]) 0.25 1.75 0.5 2.5 @@ -4783,6 +4796,7 @@ def skew(self): tiger 1 2 1 zebra 2 3 3 cow 3 4 5 + >>> df.skew() a 0.0 b 0.0 @@ -4812,6 +4826,8 @@ def kurt(self): dog 2 mouse 3 dtype: Int64 + + >>> s.kurt() np.float64(1.5) With a DataFrame @@ -4826,6 +4842,7 @@ def kurt(self): mouse 3 4 [4 rows x 2 columns] + >>> df.kurt() a 1.5 b 4.0 From 8524bc752d97604f5ffa0bf7f91b857bff60ea77 Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 10 Jan 2025 16:53:25 -0600 Subject: [PATCH 07/16] fix indentation and blankline error --- .../bigframes_vendored/pandas/core/series.py | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index d5bfb1a2ef..4719617fe4 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -41,7 +41,7 @@ def dt(self): >>> import pandas as pd >>> bpd.options.display.progress_bar = None - >>> seconds_series = bpd.Series(bpd.date_range("2000-01-01", periods=3, freq="s")) + >>> seconds_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) >>> seconds_series 0 2000-01-01 00:00:00 1 2000-01-01 00:00:01 @@ -380,8 +380,8 @@ def reset_index( Name: foo, dtype: Int64 >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), - np.array(['one', 'two', 'one', 'two'])] - >>> s2 = pd.Series( + ... np.array(['one', 'two', 'one', 'two'])] + >>> s2 = bpd.Series( ... range(4), name='foo', ... index=pd.MultiIndex.from_arrays(arrays, ... names=['a', 'b'])) @@ -549,7 +549,7 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: >>> s = bpd.Series([1, 2, 3, 4]) >>> s.to_dict() - {0: 1, 1: 2, 2: 3, 3: 4} + {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} >>> s.to_dict(into=OrderedDict) OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) @@ -590,7 +590,7 @@ def to_frame(self, name=None) -> DataFrame: 0 a 1 b 2 c - + [3 rows x 1 columns] Args: @@ -746,7 +746,7 @@ def to_pickle(self, path, **kwargs): 2 2 7 3 3 8 4 4 9 - + [5 rows x 2 columns] >>> original_df.to_pickle("./dummy.pkl") @@ -1588,7 +1588,7 @@ def sort_index( ... 'baz', 'baz', 'bar', 'bar']), ... np.array(['two', 'one', 'two', 'one', ... 'two', 'one', 'two', 'one'])] - >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s = bpd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1) bar one 8 baz one 6 @@ -1644,11 +1644,11 @@ def nlargest( >>> bpd.options.display.progress_bar = None >>> countries_population = {"Italy": 59000000, "France": 65000000, - "Malta": 434000, "Maldives": 434000, - "Brunei": 434000, "Iceland": 337000, - "Nauru": 11300, "Tuvalu": 11300, - "Anguilla": 11300, "Montserrat": 5200} - >>> s = pd.Series(countries_population) + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} + >>> s = bpd.Series(countries_population) >>> s Italy 59000000 France 65000000 @@ -1730,10 +1730,10 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: >>> bpd.options.display.progress_bar = None >>> countries_population = {"Italy": 59000000, "France": 65000000, - "Brunei": 434000, "Malta": 434000, - "Maldives": 434000, "Iceland": 337000, - "Nauru": 11300, "Tuvalu": 11300, - "Anguilla": 11300, "Montserrat": 5200} + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} >>> s = bpd.Series(countries_population) >>> s Italy 59000000 @@ -2185,7 +2185,7 @@ def drop( Drop 2nd level label in MultiIndex Series: >>> import pandas as pd - >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], + >>> midx = bpd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) @@ -2825,7 +2825,7 @@ def cummin(self): >>> import numpy as np >>> bpd.options.display.progress_bar = None - >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s = bpd.Series([2, np.nan, 5, -1, 0]) >>> s 0 2.0 1 @@ -4710,10 +4710,10 @@ def median(self, *, exact: bool = True): >>> df = bpd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) >>> df - a b + a b tiger 1 2 zebra 2 3 - + [2 rows x 2 columns] >>> df.median() @@ -4783,7 +4783,7 @@ def skew(self): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> s = pd.Series([1, 2, 3]) + >>> s = bpd.Series([1, 2, 3]) >>> s.skew() 0.0 @@ -4835,12 +4835,12 @@ def kurt(self): >>> df = bpd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, ... index=['cat', 'dog', 'dog', 'mouse']) >>> df - a b + a b cat 1 3 dog 2 4 dog 2 4 mouse 3 4 - + [4 rows x 2 columns] >>> df.kurt() @@ -5269,7 +5269,7 @@ def rename_axis(self, mapper, **kwargs): dog 4 0 cat 4 0 monkey 2 2 - + [3 rows x 2 columns] >>> df = df.rename_axis("animal") @@ -5457,12 +5457,12 @@ def isin(self, values): To invert the boolean values, use the ~ operator: >>> ~s.isin(['cow', 'llama']) - 0 True - 1 True - 2 True - 3 False - 4 True - 5 False + 0 False + 1 False + 2 False + 3 True + 4 False + 5 True Name: animal, dtype: boolean Passing a single string as s.isin('llama') will raise an error. Use a @@ -5515,7 +5515,7 @@ def is_monotonic_increasing(self) -> bool: >>> s = bpd.Series([3, 2, 1]) >>> s.is_monotonic_increasing - np.False_ + False Returns: bool: @@ -5641,10 +5641,10 @@ def iloc(self): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}] - >>> df = pd.DataFrame(mydict) + >>> df = bpd.DataFrame(mydict) >>> df a b c d 0 1 2 3 4 @@ -5729,7 +5729,7 @@ def loc(self): cobra 1 2 viper 4 5 sidewinder 7 8 - + [3 rows x 2 columns] Single label. Note this returns the row as a Series. @@ -5904,6 +5904,7 @@ def size(self) -> int: For Series: >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s 0 Ant 1 Bear 2 Cow From 4be237cd76526164dbda5754165d4c1cdbf489db Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 10 Jan 2025 16:59:37 -0600 Subject: [PATCH 08/16] fix indentation --- third_party/bigframes_vendored/pandas/core/series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 4719617fe4..9d19bd650b 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1644,10 +1644,10 @@ def nlargest( >>> bpd.options.display.progress_bar = None >>> countries_population = {"Italy": 59000000, "France": 65000000, - ... "Malta": 434000, "Maldives": 434000, - ... "Brunei": 434000, "Iceland": 337000, - ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Montserrat": 5200} + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} >>> s = bpd.Series(countries_population) >>> s Italy 59000000 From 8c740ea97794fc856b2d7bb04153cbf636fd7038 Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 10 Jan 2025 17:02:30 -0600 Subject: [PATCH 09/16] fix indentation --- third_party/bigframes_vendored/pandas/core/series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 9d19bd650b..e89871409a 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1730,10 +1730,10 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: >>> bpd.options.display.progress_bar = None >>> countries_population = {"Italy": 59000000, "France": 65000000, - ... "Malta": 434000, "Maldives": 434000, - ... "Brunei": 434000, "Iceland": 337000, - ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Montserrat": 5200} + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Montserrat": 5200} >>> s = bpd.Series(countries_population) >>> s Italy 59000000 From 940e5e28ce47e8e2624b961f7dd3b380c60ad6bc Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 14 Jan 2025 11:37:10 -0600 Subject: [PATCH 10/16] fix kokoro doctest errors --- .../bigframes_vendored/pandas/core/series.py | 91 ++++++------------- 1 file changed, 30 insertions(+), 61 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index e89871409a..604bd3f351 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -259,7 +259,7 @@ def hasnans(self) -> bool: 3 dtype: Float64 >>> s.hasnans - np.True_ + True Returns: bool @@ -394,7 +394,7 @@ def reset_index( 1 bar two 1 2 baz one 2 3 baz two 3 - + [4 rows x 3 columns] Args: @@ -552,7 +552,7 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} >>> s.to_dict(into=OrderedDict) - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + OrderedDict({np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) >>> dd = defaultdict(list) >>> s.to_dict(into=dd) @@ -753,13 +753,13 @@ def to_pickle(self, path, **kwargs): >>> unpickled_df = bpd.read_pickle("./dummy.pkl") >>> unpickled_df - foo bar + foo bar 0 0 5 1 1 6 2 2 7 3 3 8 4 4 9 - + [5 rows x 2 columns] Args: @@ -1582,37 +1582,6 @@ def sort_index( 3.0 a dtype: string - Specify index level to sort - - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', - ... 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one', - ... 'two', 'one', 'two', 'one'])] - >>> s = bpd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) - >>> s.sort_index(level=1) - bar one 8 - baz one 6 - foo one 4 - qux one 2 - bar two 7 - baz two 5 - foo two 3 - qux two 1 - dtype: int64 - - Does not sort by remaining levels when sorting by levels - - >>> s.sort_index(level=1, sort_remaining=False) - qux one 2 - foo one 4 - baz one 6 - bar one 8 - qux two 1 - foo two 3 - baz two 5 - bar two 7 - dtype: int64 - Args: axis ({0 or 'index'}): Unused. Parameter needed for compatibility with DataFrame. @@ -1660,7 +1629,7 @@ def nlargest( Tuvalu 11300 Anguilla 11300 Montserrat 5200 - dtype: int64 + dtype: Int64 The n largest elements where `n=5` by default. @@ -1670,7 +1639,7 @@ def nlargest( Malta 434000 Maldives 434000 Brunei 434000 - dtype: int64 + dtype: Int64 The n largest elements where `n=3`. Default keep value is `first` so Malta will be kept. @@ -1679,7 +1648,7 @@ def nlargest( France 65000000 Italy 59000000 Malta 434000 - dtype: int64 + dtype: Int64 The n largest elements where `n=3` and keeping the last duplicates. Brunei will be kept since it is the last with value 434000 based on the index order. @@ -1688,7 +1657,7 @@ def nlargest( France 65000000 Italy 59000000 Brunei 434000 - dtype: int64 + dtype: Int64 The n largest elements where n`=3` with all duplicates kept. Note that the returned Series has five elements due to the three duplicates. @@ -1699,7 +1668,7 @@ def nlargest( Malta 434000 Maldives 434000 Brunei 434000 - dtype: int64 + dtype: Int64 Args: n (int, default 5): @@ -2185,7 +2154,7 @@ def drop( Drop 2nd level label in MultiIndex Series: >>> import pandas as pd - >>> midx = bpd.MultiIndex(levels=[['llama', 'cow', 'falcon'], + >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) @@ -4717,9 +4686,9 @@ def median(self, *, exact: bool = True): [2 rows x 2 columns] >>> df.median() - a 1.5 - b 2.5 - dtype: float64 + a 1.5 + b 2.5 + dtype: Float64 Args: exact (bool. default True): @@ -4785,7 +4754,7 @@ def skew(self): >>> s = bpd.Series([1, 2, 3]) >>> s.skew() - 0.0 + np.float64(0.0) With a DataFrame @@ -4801,7 +4770,7 @@ def skew(self): a 0.0 b 0.0 c 0.0 - dtype: float64 + dtype: Float64 Returns: scalar: Scalar. @@ -5279,7 +5248,7 @@ def rename_axis(self, mapper, **kwargs): dog 4 0 cat 4 0 monkey 2 2 - + [3 rows x 2 columns] Returns: @@ -5511,7 +5480,7 @@ def is_monotonic_increasing(self) -> bool: >>> s = bpd.Series([1, 2, 2]) >>> s.is_monotonic_increasing - np.True_ + True >>> s = bpd.Series([3, 2, 1]) >>> s.is_monotonic_increasing @@ -5535,11 +5504,11 @@ def is_monotonic_decreasing(self) -> bool: >>> s = bpd.Series([3, 2, 2, 1]) >>> s.is_monotonic_decreasing - np.True_ + True >>> s = bpd.Series([1, 2, 3]) >>> s.is_monotonic_decreasing - np.False_ + False Returns: bool: @@ -5650,7 +5619,7 @@ def iloc(self): 0 1 2 3 4 1 100 200 300 400 2 1000 2000 3000 4000 - + [3 rows x 4 columns] Indexing just the rows @@ -5658,7 +5627,7 @@ def iloc(self): With a scalar integer. >>> type(df.iloc[0]) - pandas.core.series.Series + >>> df.iloc[0] a 1 @@ -5683,7 +5652,7 @@ def iloc(self): a b c d 0 1 2 3 4 1 100 200 300 400 - + [2 rows x 4 columns] With a slice object. @@ -5693,7 +5662,7 @@ def iloc(self): 0 1 2 3 4 1 100 200 300 400 2 1000 2000 3000 4000 - + [3 rows x 4 columns] Indexing both axes @@ -5745,7 +5714,7 @@ def loc(self): max_speed shield viper 4 5 sidewinder 7 8 - + [2 rows x 2 columns] Slice with labels for row and single label for column. As mentioned @@ -5760,7 +5729,7 @@ def loc(self): max_speed shield cobra 1 2 viper 4 5 - + [2 rows x 2 columns] Conditional that returns a boolean Series with column labels specified @@ -5768,7 +5737,7 @@ def loc(self): >>> df.loc[df['shield'] > 6, ['max_speed']] max_speed sidewinder 7 - + [1 rows x 1 columns] Multiple conditional using | that returns a boolean Series @@ -5777,7 +5746,7 @@ def loc(self): max_speed shield cobra 1 2 sidewinder 7 8 - + [2 rows x 2 columns] Please ensure that each condition is wrapped in parentheses (). @@ -5790,7 +5759,7 @@ def loc(self): cobra 30 2 viper 30 5 sidewinder 30 8 - + [3 rows x 2 columns] Returns: @@ -5850,7 +5819,7 @@ def at(self): 4 0 2 3 5 0 4 1 6 10 20 30 - + [3 rows x 3 columns] Get value at specified row/column pair From 6155970ba23b28469f573b24f4c66c86a9987e4a Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 13:25:33 -0600 Subject: [PATCH 11/16] fix doctest --- .../bigframes_vendored/pandas/core/series.py | 81 ++++++++++--------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 604bd3f351..0b2ffde58b 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -335,6 +335,7 @@ def reset_index( **Examples:** >>> import bigframes.pandas as bpd + >>> import pandas as pd >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4], name='foo', @@ -549,14 +550,14 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: >>> s = bpd.Series([1, 2, 3, 4]) >>> s.to_dict() - {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} + {0: 1, 1: 2, 2: 3, 3: 4} >>> s.to_dict(into=OrderedDict) - OrderedDict({np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> dd = defaultdict(list) >>> s.to_dict(into=dd) - defaultdict(list, {0: 1, 1: 2, 2: 3, 3: 4}) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) Args: into (class, default dict): @@ -803,7 +804,7 @@ def agg(self, func): dtype: Int64 >>> s.agg('min') - np.int64(1) + 1 >>> s.agg(['min', 'max']) min 1 @@ -838,7 +839,7 @@ def count(self): 2 dtype: Float64 >>> s.count() - np.int64(2) + 2 Returns: int or bigframes.pandas.Series (if level specified): @@ -867,7 +868,7 @@ def nunique(self) -> int: dtype: Int64 >>> s.nunique() - np.int64(4) + 4 Returns: int: @@ -1233,12 +1234,12 @@ def corr(self, other, method="pearson", min_periods=None) -> float: >>> s1 = bpd.Series([.2, .0, .6, .2]) >>> s2 = bpd.Series([.3, .6, .0, .1]) >>> s1.corr(s2) - np.float64(-0.8510644963469901) + -0.8510644963469901 >>> s1 = bpd.Series([1, 2, 3], index=[0, 1, 2]) >>> s2 = bpd.Series([1, 2, 3], index=[2, 1, 0]) >>> s1.corr(s2) - np.float64(-1.0) + -1.0 Args: other (Series): @@ -1270,16 +1271,16 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05]) >>> s.autocorr() # doctest: +ELLIPSIS - np.float64(0.10355263309024067) + 0.10355263309024067 >>> s.autocorr(lag=2) - np.float64(-1.0) + -1.0 If the Pearson correlation is not well defined, then 'NaN' is returned. >>> s = bpd.Series([1, 0, 0, 0]) >>> s.autocorr() - np.float64(nan) + nan Args: lag (int, default 1): @@ -1309,7 +1310,7 @@ def cov( >>> s1 = bpd.Series([0.90010907, 0.13484424, 0.62036035]) >>> s2 = bpd.Series([0.12528585, 0.26962463, 0.51111198]) >>> s1.cov(s2) - np.float64(-0.01685762652715874) + -0.01685762652715874 Args: other (Series): @@ -1404,12 +1405,12 @@ def dot(self, other) -> Series | np.ndarray: >>> s = bpd.Series([0, 1, 2, 3]) >>> other = bpd.Series([-1, 2, -3, 4]) >>> s.dot(other) - np.int64(8) + 8 You can also use the operator ``@`` for the dot product: >>> s @ other - np.int64(8) + 8 Args: other (Series): @@ -1707,9 +1708,9 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: >>> s Italy 59000000 France 65000000 - Brunei 434000 Malta 434000 Maldives 434000 + Brunei 434000 Iceland 337000 Nauru 11300 Tuvalu 11300 @@ -4477,7 +4478,7 @@ def max( dtype: Int64 >>> s.max() - np.int64(3) + 3 Calculating the max of a Series containing ``NA`` values: @@ -4489,7 +4490,7 @@ def max( dtype: Int64 >>> s.max() - np.int64(3) + 3 Returns: scalar: Scalar. @@ -4519,7 +4520,7 @@ def min( dtype: Int64 >>> s.min() - np.int64(1) + 1 Calculating the min of a Series containing ``NA`` values: @@ -4531,7 +4532,7 @@ def min( dtype: Int64 >>> s.min() - np.int64(1) + 1 Returns: scalar: Scalar. @@ -4608,7 +4609,7 @@ def sum(self): dtype: Int64 >>> s.sum() - np.int64(4) + 4 Calculating the sum of a Series containing ``NA`` values: @@ -4620,7 +4621,7 @@ def sum(self): dtype: Int64 >>> s.sum() - np.int64(4) + 4 Returns: scalar: Scalar. @@ -4644,7 +4645,7 @@ def mean(self): dtype: Int64 >>> s.mean() - np.float64(2.0) + 2.0 Calculating the mean of a Series containing ``NA`` values: @@ -4656,7 +4657,7 @@ def mean(self): dtype: Int64 >>> s.mean() - np.float64(2.0) + 2.0 Returns: scalar: Scalar. @@ -4673,7 +4674,7 @@ def median(self, *, exact: bool = True): >>> s = bpd.Series([1, 2, 3]) >>> s.median() - np.float64(2.0) + 2.0 With a DataFrame @@ -4714,7 +4715,7 @@ def quantile( >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) - np.float64(2.5) + 2.5 >>> s.quantile([.25, .5, .75]) 0.25 1.75 @@ -4754,7 +4755,7 @@ def skew(self): >>> s = bpd.Series([1, 2, 3]) >>> s.skew() - np.float64(0.0) + 0.0 With a DataFrame @@ -4765,6 +4766,8 @@ def skew(self): tiger 1 2 1 zebra 2 3 3 cow 3 4 5 + + [3 rows x 3 columns] >>> df.skew() a 0.0 @@ -4797,7 +4800,7 @@ def kurt(self): dtype: Int64 >>> s.kurt() - np.float64(1.5) + 1.5 With a DataFrame @@ -5095,10 +5098,10 @@ def argmax(self): dtype: Float64 >>> s.argmax() - np.int64(2) + 2 >>> s.argmin() - np.int64(0) + 0 The maximum cereal calories is the third element and the minimum cereal calories is the first element, since series is zero-indexed. @@ -5133,10 +5136,10 @@ def argmin(self): dtype: Float64 >>> s.argmax() - np.int64(2) + 2 >>> s.argmin() - np.int64(0) + 0 The maximum cereal calories is the third element and the minimum cereal calories is the first element, since series is zero-indexed. @@ -5646,7 +5649,7 @@ def iloc(self): Name: 0, dtype: Int64 >>> type(df.iloc[[0]]) - bigframes.dataframe.DataFrame + >>> df.iloc[[0, 1]] a b c d @@ -5673,7 +5676,7 @@ def iloc(self): With scalar integers. >>> df.iloc[0, 1] - np.int64(2) + 2 Returns: bigframes.core.indexers.IlocSeriesIndexer: @@ -5721,7 +5724,7 @@ def loc(self): above, note that both the start and stop of the slice are included. >>> df.loc['cobra', 'shield'] - np.int64(2) + 2 Index (same behavior as df.reindex) @@ -5790,12 +5793,12 @@ def iat(self): Get value at specified row/column pair >>> df.iat[1, 2] - np.int64(1) + 1 Get value within a series >>> df.loc[0].iat[1] - np.int64(2) + 2 Returns: bigframes.core.indexers.IatSeriesIndexer: @@ -5825,12 +5828,12 @@ def at(self): Get value at specified row/column pair >>> df.at[4, 'B'] - np.int64(2) + 2 Get value at specified row label >>> df.loc[5].at['B'] - np.int64(4) + 4 Returns: bigframes.core.indexers.AtSeriesIndexer: @@ -6089,7 +6092,7 @@ def __getitem__(self, indexer): >>> s = bpd.Series([15, 30, 45]) >>> s[1] - np.int64(30) + 30 >>> s[0:2] 0 15 1 30 From 320a6826e3e34d91800aa0276767a364e9306b2f Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 14:09:56 -0600 Subject: [PATCH 12/16] fix kokoro doctest --- .../bigframes_vendored/pandas/core/series.py | 87 +++++++++---------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 65876a2ad0..8858dc0cea 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -259,7 +259,7 @@ def hasnans(self) -> bool: 3 dtype: Float64 >>> s.hasnans - True + np.True_ Returns: bool @@ -449,10 +449,8 @@ def to_string( >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series([1, 2, 3]).to_string() - >>> ser - '0 1 - 1 2 - 2 3' + >>> ser #doctest: +SKIP + '0 1\n1 2\n2 3' Args: buf (StringIO-like, optional): @@ -550,7 +548,7 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: >>> s = bpd.Series([1, 2, 3, 4]) >>> s.to_dict() - {0: 1, 1: 2, 2: 3, 3: 4} + {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} >>> s.to_dict(into=OrderedDict) OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) @@ -804,7 +802,7 @@ def agg(self, func): dtype: Int64 >>> s.agg('min') - 1 + np.int64(1) >>> s.agg(['min', 'max']) min 1 @@ -868,7 +866,7 @@ def nunique(self) -> int: dtype: Int64 >>> s.nunique() - 4 + np.int64(4) Returns: int: @@ -1234,12 +1232,12 @@ def corr(self, other, method="pearson", min_periods=None) -> float: >>> s1 = bpd.Series([.2, .0, .6, .2]) >>> s2 = bpd.Series([.3, .6, .0, .1]) >>> s1.corr(s2) - -0.8510644963469901 + np.float64(-0.8510644963469901) >>> s1 = bpd.Series([1, 2, 3], index=[0, 1, 2]) >>> s2 = bpd.Series([1, 2, 3], index=[2, 1, 0]) >>> s1.corr(s2) - -1.0 + np.float64(-1.0) Args: other (Series): @@ -1271,10 +1269,10 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05]) >>> s.autocorr() # doctest: +ELLIPSIS - 0.10355263309024067 + np.float64(0.10355263309024067) >>> s.autocorr(lag=2) - -1.0 + np.float64(-1.0) If the Pearson correlation is not well defined, then 'NaN' is returned. @@ -1310,7 +1308,7 @@ def cov( >>> s1 = bpd.Series([0.90010907, 0.13484424, 0.62036035]) >>> s2 = bpd.Series([0.12528585, 0.26962463, 0.51111198]) >>> s1.cov(s2) - -0.01685762652715874 + np.float64(-0.01685762652715874) Args: other (Series): @@ -1405,12 +1403,12 @@ def dot(self, other) -> Series | np.ndarray: >>> s = bpd.Series([0, 1, 2, 3]) >>> other = bpd.Series([-1, 2, -3, 4]) >>> s.dot(other) - 8 + np.int64(8) You can also use the operator ``@`` for the dot product: >>> s @ other - 8 + np.int64(8) Args: other (Series): @@ -4459,16 +4457,16 @@ def any( For Series input, the output is a scalar indicating whether any element is True. >>> bpd.Series([False, False]).any() - False + np.False_ >>> bpd.Series([True, False]).any() - True + np.True_ >>> bpd.Series([], dtype="float64").any() - False + np.False_ >>> bpd.Series([np.nan]).any() - False + np.False_ Returns: scalar or bigframes.pandas.Series: @@ -4500,7 +4498,7 @@ def max( dtype: Int64 >>> s.max() - 3 + np.int64(3) Calculating the max of a Series containing ``NA`` values: @@ -4512,7 +4510,7 @@ def max( dtype: Int64 >>> s.max() - 3 + np.int64(3) Returns: scalar: Scalar. @@ -4542,7 +4540,7 @@ def min( dtype: Int64 >>> s.min() - 1 + np.int64(1) Calculating the min of a Series containing ``NA`` values: @@ -4554,7 +4552,7 @@ def min( dtype: Int64 >>> s.min() - 1 + np.int64(1) Returns: scalar: Scalar. @@ -4631,7 +4629,7 @@ def sum(self): dtype: Int64 >>> s.sum() - 4 + np.int64(4) Calculating the sum of a Series containing ``NA`` values: @@ -4643,7 +4641,7 @@ def sum(self): dtype: Int64 >>> s.sum() - 4 + np.int64(4) Returns: scalar: Scalar. @@ -4667,7 +4665,7 @@ def mean(self): dtype: Int64 >>> s.mean() - 2.0 + np.float64(2.0) Calculating the mean of a Series containing ``NA`` values: @@ -4679,7 +4677,7 @@ def mean(self): dtype: Int64 >>> s.mean() - 2.0 + np.float64(2.0) Returns: scalar: Scalar. @@ -4696,7 +4694,7 @@ def median(self, *, exact: bool = True): >>> s = bpd.Series([1, 2, 3]) >>> s.median() - 2.0 + np.float64(2.0) With a DataFrame @@ -4737,7 +4735,7 @@ def quantile( >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) - 2.5 + np.float64(2.5) >>> s.quantile([.25, .5, .75]) 0.25 1.75 @@ -4777,7 +4775,7 @@ def skew(self): >>> s = bpd.Series([1, 2, 3]) >>> s.skew() - 0.0 + np.float64(0.0) With a DataFrame @@ -4822,7 +4820,7 @@ def kurt(self): dtype: Int64 >>> s.kurt() - 1.5 + np.float64(1.5) With a DataFrame @@ -5120,10 +5118,10 @@ def argmax(self): dtype: Float64 >>> s.argmax() - 2 + np.int64(2) >>> s.argmin() - 0 + np.int64(0) The maximum cereal calories is the third element and the minimum cereal calories is the first element, since series is zero-indexed. @@ -5158,10 +5156,10 @@ def argmin(self): dtype: Float64 >>> s.argmax() - 2 + np.int64(2) >>> s.argmin() - 0 + np.int64(0) The maximum cereal calories is the third element and the minimum cereal calories is the first element, since series is zero-indexed. @@ -5529,11 +5527,11 @@ def is_monotonic_decreasing(self) -> bool: >>> s = bpd.Series([3, 2, 2, 1]) >>> s.is_monotonic_decreasing - True + np.True_ >>> s = bpd.Series([1, 2, 3]) >>> s.is_monotonic_decreasing - False + np.False_ Returns: bool: @@ -5698,7 +5696,7 @@ def iloc(self): With scalar integers. >>> df.iloc[0, 1] - 2 + np.int64(2) Returns: bigframes.core.indexers.IlocSeriesIndexer: @@ -5746,7 +5744,7 @@ def loc(self): above, note that both the start and stop of the slice are included. >>> df.loc['cobra', 'shield'] - 2 + np.int64(2) Index (same behavior as df.reindex) @@ -5815,12 +5813,12 @@ def iat(self): Get value at specified row/column pair >>> df.iat[1, 2] - 1 + np.int64(1) Get value within a series >>> df.loc[0].iat[1] - 2 + np.int64(2) Returns: bigframes.core.indexers.IatSeriesIndexer: @@ -5850,12 +5848,12 @@ def at(self): Get value at specified row/column pair >>> df.at[4, 'B'] - 2 + np.int64(2) Get value at specified row label >>> df.loc[5].at['B'] - 4 + np.int64(4) Returns: bigframes.core.indexers.AtSeriesIndexer: @@ -6114,7 +6112,8 @@ def __getitem__(self, indexer): >>> s = bpd.Series([15, 30, 45]) >>> s[1] - 30 + np.int64(30) + >>> s[0:2] 0 15 1 30 From 18e704c9949d96e185fe01961be498fd17e03531 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 14:18:19 -0600 Subject: [PATCH 13/16] fix kokoro doctest --- third_party/bigframes_vendored/pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 8858dc0cea..197a820cf0 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -450,6 +450,7 @@ def to_string( >>> ser = bpd.Series([1, 2, 3]).to_string() >>> ser #doctest: +SKIP + '0 1\n1 2\n2 3' Args: From 70d11cefef2769d7effe01c648a5d326638b6797 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 14:21:34 -0600 Subject: [PATCH 14/16] remove .to_string code example --- third_party/bigframes_vendored/pandas/core/series.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 197a820cf0..9434cf8384 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -443,16 +443,6 @@ def to_string( """ Render a string representation of the Series. - **Examples:** - - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - - >>> ser = bpd.Series([1, 2, 3]).to_string() - >>> ser #doctest: +SKIP - - '0 1\n1 2\n2 3' - Args: buf (StringIO-like, optional): Buffer to write to. From fa7e042555014620792ac28e2a56bda68f1f31c9 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 15:13:23 -0600 Subject: [PATCH 15/16] fix kokoro doctest --- .../bigframes_vendored/pandas/core/series.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 9434cf8384..bc05f27a6f 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -542,7 +542,7 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4} >>> s.to_dict(into=OrderedDict) - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + OrderedDict({np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) >>> dd = defaultdict(list) >>> s.to_dict(into=dd) @@ -828,7 +828,7 @@ def count(self): 2 dtype: Float64 >>> s.count() - 2 + np.int64(2) Returns: int or bigframes.pandas.Series (if level specified): @@ -974,7 +974,7 @@ def drop_duplicates( 5 hippo Name: animal, dtype: string - The value ‘last’ for parameter ‘keep’ keeps the last occurrence for + The value 'last' for parameter 'keep' keeps the last occurrence for each set of duplicated entries. >>> s.drop_duplicates(keep='last') @@ -984,7 +984,7 @@ def drop_duplicates( 5 hippo Name: animal, dtype: string - The value False for parameter ‘keep’ discards all sets of duplicated entries. + The value False for parameter 'keep' discards all sets of duplicated entries. >>> s.drop_duplicates(keep=False) 1 cow @@ -1041,7 +1041,7 @@ def duplicated(self, keep="first") -> Series: 4 True dtype: boolean - By using ‘last’, the last occurrence of each set of duplicated values + By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: >>> animals.duplicated(keep='last') @@ -1269,7 +1269,7 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([1, 0, 0, 0]) >>> s.autocorr() - nan + np.float(nan) Args: lag (int, default 1): @@ -5494,11 +5494,11 @@ def is_monotonic_increasing(self) -> bool: >>> s = bpd.Series([1, 2, 2]) >>> s.is_monotonic_increasing - True + np.True_ >>> s = bpd.Series([3, 2, 1]) >>> s.is_monotonic_increasing - False + np.False_ Returns: bool: From 230c9de4ca5b1a5db43157b75f4680c93126e20a Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 16 Jan 2025 15:51:16 -0600 Subject: [PATCH 16/16] fix kokoro doctest --- third_party/bigframes_vendored/pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index bc05f27a6f..727e25836a 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -546,7 +546,7 @@ def to_dict(self, into: type[dict] = dict) -> Mapping: >>> dd = defaultdict(list) >>> s.to_dict(into=dd) - defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + defaultdict(, {np.int64(0): 1, np.int64(1): 2, np.int64(2): 3, np.int64(3): 4}) Args: into (class, default dict): @@ -1269,7 +1269,7 @@ def autocorr(self, lag: int = 1) -> float: >>> s = bpd.Series([1, 0, 0, 0]) >>> s.autocorr() - np.float(nan) + np.float64(nan) Args: lag (int, default 1):