Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

feat: show possible correct key(s) in .__getitem__ KeyError message #1097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion 16 bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import bigframes_vendored.constants as constants
import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
import jellyfish
import pandas as pd

from bigframes.core import log_adapter
Expand Down Expand Up @@ -91,8 +92,21 @@ def __getitem__(

bad_keys = [key for key in keys if key not in self._block.column_labels]

# Raise a KeyError message with the possible correct key(s)
if len(bad_keys) > 0:
raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
possible_key = []
for bad_key in bad_keys:
possible_key.append(
min(
self._block.column_labels,
key=lambda item: jellyfish.damerau_levenshtein_distance(
bad_key, item
),
)
)
raise KeyError(
f"Columns not found: {str(bad_keys)[1:-1]}. Did you mean {str(possible_key)[1:-1]}?"
)

columns = [
col_id for col_id, label in self._col_id_labels.items() if label in keys
Expand Down
37 changes: 21 additions & 16 deletions 37 tests/system/small/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,24 +426,12 @@ def test_dataframe_groupby_getitem_error(
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
(
scalars_df_index[col_names]
.groupby("string_col")["not_in_group"]
.min()
.to_pandas()
)


def test_dataframe_groupby_getitem_multiple_columns_error(
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
with pytest.raises(
KeyError, match=r"Columns not found: 'not_in_group'. Did you mean 'string_col'?"
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "col2"]
.groupby("bool_col")["not_in_group"]
.min()
.to_pandas()
)
Expand All @@ -464,6 +452,23 @@ def test_dataframe_groupby_getitem_list(
pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)


def test_dataframe_groupby_getitem_list_error(
arwas11 marked this conversation as resolved.
Show resolved Hide resolved
scalars_df_index,
scalars_pandas_df_index,
):
col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
with pytest.raises(
KeyError,
match=r"Columns not found: 'col1', 'float'. Did you mean 'bool_col', 'float64_col'?",
):
(
scalars_df_index[col_names]
.groupby("string_col")["col1", "float"]
.min()
.to_pandas()
)


def test_dataframe_groupby_nonnumeric_with_mean():
df = pd.DataFrame(
{
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.