Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

feat: add iat and iloc accessing by tuples of integers #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions 44 bigframes/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,16 @@ def __getitem__(
return _iloc_getitem_series_or_dataframe(self._series, key)


class IatSeriesIndexer:
def __init__(self, series: bigframes.series.Series):
self._series = series

def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar:
if not isinstance(key, int):
raise ValueError("Series iAt based indexing can only have integer indexers")
return self._series.iloc[key]


class LocDataFrameIndexer:
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self._dataframe = dataframe
Expand Down Expand Up @@ -188,6 +198,28 @@ def __getitem__(self, key) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
return _iloc_getitem_series_or_dataframe(self._dataframe, key)


class IatDataFrameIndexer:
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self._dataframe = dataframe

def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar:
error_message = "DataFrame.iat should be indexed by a tuple of exactly 2 ints"
# we raise TypeError or ValueError under the same conditions that pandas does
if isinstance(key, int):
raise TypeError(error_message)
if not isinstance(key, tuple):
raise ValueError(error_message)
key_values_are_ints = [isinstance(key_value, int) for key_value in key]
if not all(key_values_are_ints):
raise ValueError(error_message)
if len(key) != 2:
raise TypeError(error_message)
block = self._dataframe._block
column_block = block.select_columns([block.value_columns[key[1]]])
column = bigframes.series.Series(column_block)
return column.iloc[key[0]]


@typing.overload
def _loc_getitem_series_or_dataframe(
series_or_dataframe: bigframes.series.Series, key
Expand Down Expand Up @@ -356,6 +388,18 @@ def _iloc_getitem_series_or_dataframe(
return result_pd_df.iloc[0]
elif isinstance(key, slice):
return series_or_dataframe._slice(key.start, key.stop, key.step)
elif isinstance(key, tuple) and len(key) == 0:
return series_or_dataframe
elif isinstance(key, tuple) and len(key) == 1:
return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0])
elif (
isinstance(key, tuple)
and isinstance(series_or_dataframe, bigframes.dataframe.DataFrame)
and len(key) == 2
):
return series_or_dataframe.iat[key]
elif isinstance(key, tuple):
raise pd.errors.IndexingError("Too many indexers")
elif pd.api.types.is_list_like(key):
if len(key) == 0:
return typing.cast(
Expand Down
4 changes: 4 additions & 0 deletions 4 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ def loc(self) -> indexers.LocDataFrameIndexer:
def iloc(self) -> indexers.ILocDataFrameIndexer:
return indexers.ILocDataFrameIndexer(self)

@property
def iat(self) -> indexers.IatDataFrameIndexer:
return indexers.IatDataFrameIndexer(self)

@property
def dtypes(self) -> pandas.Series:
return pandas.Series(data=self._block.dtypes, index=self._block.column_labels)
Expand Down
4 changes: 4 additions & 0 deletions 4 bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ def loc(self) -> bigframes.core.indexers.LocSeriesIndexer:
def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer:
return bigframes.core.indexers.IlocSeriesIndexer(self)

@property
def iat(self) -> bigframes.core.indexers.IatSeriesIndexer:
return bigframes.core.indexers.IatSeriesIndexer(self)

@property
def name(self) -> blocks.Label:
return self._name
Expand Down
55 changes: 54 additions & 1 deletion 55 tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2077,7 +2077,7 @@ def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index):

@pytest.mark.parametrize(
"index",
[0, 5, -2],
[0, 5, -2, (2,)],
)
def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iloc[index]
Expand All @@ -2089,6 +2089,59 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
)


@pytest.mark.parametrize(
"index",
[(2, 5), (5, 0), (0, 0)],
)
def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iloc[index]
pd_result = scalars_pandas_df_index.iloc[index]

assert bf_result == pd_result


@pytest.mark.parametrize(
("index", "error"),
[
((1, 1, 1), pd.errors.IndexingError),
(("asd", "asd", "asd"), pd.errors.IndexingError),
(("asd"), TypeError),
],
)
def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error):
with pytest.raises(error):
scalars_df_index.iloc[index]
with pytest.raises(error):
scalars_pandas_df_index.iloc[index]


@pytest.mark.parametrize(
"index",
[(2, 5), (5, 0), (0, 0)],
)
def test_iat(scalars_df_index, scalars_pandas_df_index, index):
bf_result = scalars_df_index.iat[index]
pd_result = scalars_pandas_df_index.iat[index]

assert bf_result == pd_result


@pytest.mark.parametrize(
("index", "error"),
[
(0, TypeError),
("asd", ValueError),
((1, 2, 3), TypeError),
(("asd", "asd"), ValueError),
],
)
def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error):
with pytest.raises(error):
scalars_pandas_df_index.iat[index]
with pytest.raises(error):
scalars_df_index.iat[index]


def test_iloc_single_integer_out_of_bound_error(
scalars_df_index, scalars_pandas_df_index
):
Expand Down
14 changes: 14 additions & 0 deletions 14 tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1953,6 +1953,20 @@ def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, ste
)


def test_iat(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_too"].iat[3]
pd_result = scalars_pandas_df_index["int64_too"].iat[3]

assert bf_result == pd_result


def test_iat_error(scalars_df_index, scalars_pandas_df_index):
with pytest.raises(ValueError):
scalars_pandas_df_index["int64_too"].iat["asd"]
with pytest.raises(ValueError):
scalars_df_index["int64_too"].iat["asd"]


def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index["int64_too"].add_prefix("prefix_").to_pandas()

Expand Down
10 changes: 10 additions & 0 deletions 10 third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,3 +2097,13 @@ def fillna(self, value):
DataFrame: Object with missing values filled
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iloc(self):
"""Purely integer-location based indexing for selection by position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iat(self):
"""Access a single value for a row/column pair by integer position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
10 changes: 10 additions & 0 deletions 10 third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1823,3 +1823,13 @@ def map(
Series: Same index as caller.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iloc(self):
"""Purely integer-location based indexing for selection by position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

@property
def iat(self):
"""Access a single value for a row/column pair by integer position."""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.