diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 1a88b2abd6..a74880041c 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -97,6 +97,16 @@ def __getitem__( return _iloc_getitem_series_or_dataframe(self._series, key) +class IatSeriesIndexer: + def __init__(self, series: bigframes.series.Series): + self._series = series + + def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: + if not isinstance(key, int): + raise ValueError("Series iAt based indexing can only have integer indexers") + return self._series.iloc[key] + + class LocDataFrameIndexer: def __init__(self, dataframe: bigframes.dataframe.DataFrame): self._dataframe = dataframe @@ -188,6 +198,28 @@ def __getitem__(self, key) -> Union[bigframes.dataframe.DataFrame, pd.Series]: return _iloc_getitem_series_or_dataframe(self._dataframe, key) +class IatDataFrameIndexer: + def __init__(self, dataframe: bigframes.dataframe.DataFrame): + self._dataframe = dataframe + + def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar: + error_message = "DataFrame.iat should be indexed by a tuple of exactly 2 ints" + # we raise TypeError or ValueError under the same conditions that pandas does + if isinstance(key, int): + raise TypeError(error_message) + if not isinstance(key, tuple): + raise ValueError(error_message) + key_values_are_ints = [isinstance(key_value, int) for key_value in key] + if not all(key_values_are_ints): + raise ValueError(error_message) + if len(key) != 2: + raise TypeError(error_message) + block = self._dataframe._block + column_block = block.select_columns([block.value_columns[key[1]]]) + column = bigframes.series.Series(column_block) + return column.iloc[key[0]] + + @typing.overload def _loc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key @@ -356,6 +388,18 @@ def _iloc_getitem_series_or_dataframe( return result_pd_df.iloc[0] elif isinstance(key, slice): return series_or_dataframe._slice(key.start, key.stop, key.step) + elif isinstance(key, tuple) and len(key) == 0: + return series_or_dataframe + elif isinstance(key, tuple) and len(key) == 1: + return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0]) + elif ( + isinstance(key, tuple) + and isinstance(series_or_dataframe, bigframes.dataframe.DataFrame) + and len(key) == 2 + ): + return series_or_dataframe.iat[key] + elif isinstance(key, tuple): + raise pd.errors.IndexingError("Too many indexers") elif pd.api.types.is_list_like(key): if len(key) == 0: return typing.cast( diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 87d0f21b62..d5b2fa86e9 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -254,6 +254,10 @@ def loc(self) -> indexers.LocDataFrameIndexer: def iloc(self) -> indexers.ILocDataFrameIndexer: return indexers.ILocDataFrameIndexer(self) + @property + def iat(self) -> indexers.IatDataFrameIndexer: + return indexers.IatDataFrameIndexer(self) + @property def dtypes(self) -> pandas.Series: return pandas.Series(data=self._block.dtypes, index=self._block.column_labels) diff --git a/bigframes/series.py b/bigframes/series.py index e5afe91e44..56e1b43a03 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -87,6 +87,10 @@ def loc(self) -> bigframes.core.indexers.LocSeriesIndexer: def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer: return bigframes.core.indexers.IlocSeriesIndexer(self) + @property + def iat(self) -> bigframes.core.indexers.IatSeriesIndexer: + return bigframes.core.indexers.IatSeriesIndexer(self) + @property def name(self) -> blocks.Label: return self._name diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 711da10c55..0262a31a92 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2077,7 +2077,7 @@ def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index): @pytest.mark.parametrize( "index", - [0, 5, -2], + [0, 5, -2, (2,)], ) def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index): bf_result = scalars_df_index.iloc[index] @@ -2089,6 +2089,59 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index): ) +@pytest.mark.parametrize( + "index", + [(2, 5), (5, 0), (0, 0)], +) +def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index): + bf_result = scalars_df_index.iloc[index] + pd_result = scalars_pandas_df_index.iloc[index] + + assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("index", "error"), + [ + ((1, 1, 1), pd.errors.IndexingError), + (("asd", "asd", "asd"), pd.errors.IndexingError), + (("asd"), TypeError), + ], +) +def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error): + with pytest.raises(error): + scalars_df_index.iloc[index] + with pytest.raises(error): + scalars_pandas_df_index.iloc[index] + + +@pytest.mark.parametrize( + "index", + [(2, 5), (5, 0), (0, 0)], +) +def test_iat(scalars_df_index, scalars_pandas_df_index, index): + bf_result = scalars_df_index.iat[index] + pd_result = scalars_pandas_df_index.iat[index] + + assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("index", "error"), + [ + (0, TypeError), + ("asd", ValueError), + ((1, 2, 3), TypeError), + (("asd", "asd"), ValueError), + ], +) +def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error): + with pytest.raises(error): + scalars_pandas_df_index.iat[index] + with pytest.raises(error): + scalars_df_index.iat[index] + + def test_iloc_single_integer_out_of_bound_error( scalars_df_index, scalars_pandas_df_index ): diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 993df18c95..802425510a 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1953,6 +1953,20 @@ def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, ste ) +def test_iat(scalars_df_index, scalars_pandas_df_index): + bf_result = scalars_df_index["int64_too"].iat[3] + pd_result = scalars_pandas_df_index["int64_too"].iat[3] + + assert bf_result == pd_result + + +def test_iat_error(scalars_df_index, scalars_pandas_df_index): + with pytest.raises(ValueError): + scalars_pandas_df_index["int64_too"].iat["asd"] + with pytest.raises(ValueError): + scalars_df_index["int64_too"].iat["asd"] + + def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index["int64_too"].add_prefix("prefix_").to_pandas() diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 644e043e83..80a5428b36 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2097,3 +2097,13 @@ def fillna(self, value): DataFrame: Object with missing values filled """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iloc(self): + """Purely integer-location based indexing for selection by position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iat(self): + """Access a single value for a row/column pair by integer position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 970ab1d8b4..03729922d5 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1823,3 +1823,13 @@ def map( Series: Same index as caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iloc(self): + """Purely integer-location based indexing for selection by position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iat(self): + """Access a single value for a row/column pair by integer position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)