Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

fix: Remove index requirement from some dataframe APIs #1073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 2 bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,8 @@ def vector_search(
)
if index_col_ids is not None:
df = query._session.read_gbq(sql, index_col=index_col_ids)
df.index.names = index_labels
else:
df = query._session.read_gbq(sql)
df.index.names = index_labels

return df
15 changes: 9 additions & 6 deletions 15 bigframes/core/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ def eval(df: dataframe.DataFrame, expr: str, target: Optional[dataframe.DataFram
Returns:
Result of evaluation.
"""
index_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(
df.index.get_level_values(level).to_series()
)
for level, name in enumerate(df.index.names)
}
if df._has_index:
index_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(
df.index.get_level_values(level).to_series()
)
for level, name in enumerate(df.index.names)
}
else:
index_resolver = {}
column_resolver = {
vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(series)
for name, series in df.items()
Expand Down
3 changes: 2 additions & 1 deletion 3 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ def _apply_series_binop_axis_0(
reverse: bool = False,
) -> DataFrame:
bf_series = bigframes.core.convert.to_bf_series(
other, self.index, self._session
other, self.index if self._has_index else None, self._session
)
aligned_block, columns, expr_pairs = self._block._align_axis_0(
bf_series._block, how=how
Expand Down Expand Up @@ -3179,6 +3179,7 @@ def to_gbq(
clustering_columns: Union[pandas.Index, Iterable[typing.Hashable]] = (),
labels: dict[str, str] = {},
) -> str:
index = index and self._has_index
temp_table_ref = None

if destination_table is None:
Expand Down
53 changes: 53 additions & 0 deletions 53 tests/system/small/test_null_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@
from tests.system.utils import skip_legacy_pandas


def test_null_index_to_gbq(session, scalars_df_null_index, dataset_id_not_created):
dataset_id = dataset_id_not_created
destination_table = f"{dataset_id}.scalars_df_unindexed"

result_table = scalars_df_null_index.to_gbq(
destination_table, clustering_columns=["int64_col"]
)
assert (
result_table == destination_table
if destination_table
else result_table is not None
)

loaded_scalars_df_index = session.read_gbq(result_table)
assert not loaded_scalars_df_index.empty


def test_null_index_materialize(scalars_df_null_index, scalars_pandas_df_default_index):
bf_result = scalars_df_null_index.to_pandas()
pd.testing.assert_frame_equal(
Expand Down Expand Up @@ -83,6 +100,23 @@ def test_null_index_aggregate(scalars_df_null_index, scalars_pandas_df_default_i
)


def test_null_index_binop_series_axis_0(
scalars_df_null_index, scalars_pandas_df_default_index
):
bf_result = (
scalars_df_null_index[["int64_col", "int64_too"]]
.add(scalars_df_null_index["int64_col"], axis=0)
.to_pandas()
)
pd_result = scalars_pandas_df_default_index[["int64_col", "int64_too"]].add(
scalars_pandas_df_default_index.int64_col, axis=0
)

pd.testing.assert_frame_equal(
bf_result, pd_result, check_dtype=False, check_index_type=False
)


def test_null_index_groupby_aggregate(
scalars_df_null_index, scalars_pandas_df_default_index
):
Expand Down Expand Up @@ -139,6 +173,25 @@ def test_null_index_merge_left_null_index_object(
assert got.shape == expected.shape


@skip_legacy_pandas
@pytest.mark.parametrize(
("expr",),
[
("new_col = int64_col + int64_too",),
("new_col = (rowindex > 3) | bool_col",),
("int64_too = bool_col\nnew_col2 = rowindex",),
],
)
def test_null_index_df_eval(
scalars_df_null_index, scalars_pandas_df_default_index, expr
):

bf_result = scalars_df_null_index.eval(expr).to_pandas()
pd_result = scalars_pandas_df_default_index.eval(expr)

pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)


def test_null_index_merge_right_null_index_object(
scalars_df_null_index, scalars_df_default_index, scalars_pandas_df_default_index
):
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.