diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 45dbcdc78d..40f12671ae 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -304,6 +304,9 @@ def __len__(self): rows, _ = self.shape return rows + def __iter__(self): + return iter(self.columns) + def astype( self, dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype], @@ -1477,12 +1480,27 @@ def isin(self, values) -> DataFrame: f"isin(), you passed a [{type(values).__name__}]" ) + def keys(self) -> pandas.Index: + return self.columns + def items(self): column_ids = self._block.value_columns column_labels = self._block.column_labels for col_id, col_label in zip(column_ids, column_labels): yield col_label, bigframes.series.Series(self._block.select_column(col_id)) + def iterrows(self) -> Iterable[tuple[typing.Any, pandas.Series]]: + for df in self.to_pandas_batches(): + for item in df.iterrows(): + yield item + + def itertuples( + self, index: bool = True, name: typing.Optional[str] = "Pandas" + ) -> Iterable[tuple[typing.Any, ...]]: + for df in self.to_pandas_batches(): + for item in df.itertuples(index=index, name=name): + yield item + def dropna( self, *, diff --git a/bigframes/series.py b/bigframes/series.py index 824757cf52..032bdf6c42 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -16,6 +16,7 @@ from __future__ import annotations +import itertools import numbers import textwrap import typing @@ -148,6 +149,11 @@ def _set_internal_query_job(self, query_job: bigquery.QueryJob): def __len__(self): return self.shape[0] + def __iter__(self) -> typing.Iterator: + return itertools.chain.from_iterable( + map(lambda x: x.index, self._block.to_pandas_batches()) + ) + def copy(self) -> Series: return Series(self._block) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 2b710d692a..bd5930e508 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -803,6 +803,55 @@ def test_apply_series_scalar_callable( pandas.testing.assert_series_equal(bf_result, pd_result) +def test_df_keys( + scalars_df_index, + scalars_pandas_df_index, +): + pandas.testing.assert_index_equal( + scalars_df_index.keys(), scalars_pandas_df_index.keys() + ) + + +def test_df_iter( + scalars_df_index, + scalars_pandas_df_index, +): + for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index): + assert bf_i == df_i + + +def test_iterrows( + scalars_df_index, + scalars_pandas_df_index, +): + for (bf_index, bf_series), (pd_index, pd_series) in zip( + scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows() + ): + assert bf_index == pd_index + pandas.testing.assert_series_equal(bf_series, pd_series) + + +@pytest.mark.parametrize( + ( + "index", + "name", + ), + [ + ( + True, + "my_df", + ), + (False, None), + ], +) +def test_itertuples(scalars_df_index, index, name): + # Numeric has slightly different representation as a result of conversions. + bf_tuples = scalars_df_index.itertuples(index, name) + pd_tuples = scalars_df_index.to_pandas().itertuples(index, name) + for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples): + assert bf_tuple == pd_tuple + + def test_df_isin_list(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs values = ["Hello, World!", 55555, 2.51, pd.NA, True] diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index e267fac0f7..6f4f6be35d 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -975,6 +975,85 @@ def isin(self, values): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def keys(self): + """ + Get the 'info axis'. + + This is index for Series, columns for DataFrame. + + Returns: + Index: Info axis. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({ + ... 'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... }) + >>> df.keys() + Index(['A', 'B'], dtype='object') + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def iterrows(self): + """ + Iterate over DataFrame rows as (index, Series) pairs. + + Yields: + a tuple (index, data) where data contains row values as a Series + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame({ + ... 'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... }) + >>> index, row = next(df.iterrows()) + >>> index + 0 + >>> row + A 1 + B 4 + Name: 0, dtype: object + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def itertuples(self, index: bool = True, name: str | None = "Pandas"): + """ + Iterate over DataFrame rows as namedtuples. + + Args: + index (bool, default True): + If True, return the index as the first element of the tuple. + name (str or None, default "Pandas"): + The name of the returned namedtuples or None to return regular + tuples. + + Returns: + iterator: + An object to iterate over namedtuples for each row in the + DataFrame with the first field possibly being the index and + following fields being the column values. + + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> df = bpd.DataFrame({ + ... 'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... }) + >>> next(df.itertuples(name="Pair")) + Pair(Index=0, A=1, B=4) + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def items(self): """ Iterate over (column name, Series) pairs. diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index 27d2e84537..127efe6a3d 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -1,7 +1,7 @@ # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py from __future__ import annotations -from typing import Literal, Optional +from typing import Iterator, Literal, Optional from bigframes import constants from third_party.bigframes_vendored.pandas.core import indexing @@ -35,6 +35,35 @@ def size(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def __iter__(self) -> Iterator: + """ + Iterate over info axis. + + Returns + iterator: Info axis as iterator. + + **Examples:** + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({ + ... 'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... }) + >>> for x in df: + ... print(x) + A + B + + >>> series = bpd.Series(["a", "b", "c"], index=[10, 20, 30]) + >>> for x in series: + ... print(x) + 10 + 20 + 30 + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + # ------------------------------------------------------------------------- # Unary Methods