From baa8da5bd16847c1bc4b912c469f3b55efbafadc Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 9 Oct 2024 22:18:28 +0000 Subject: [PATCH] fix: Remove index requirement from some dataframe APIs --- bigframes/bigquery/__init__.py | 2 +- bigframes/core/eval.py | 15 +++++--- bigframes/dataframe.py | 3 +- tests/system/small/test_null_index.py | 53 +++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 8 deletions(-) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 28a818e709..847ed5eccb 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -493,8 +493,8 @@ def vector_search( ) if index_col_ids is not None: df = query._session.read_gbq(sql, index_col=index_col_ids) + df.index.names = index_labels else: df = query._session.read_gbq(sql) - df.index.names = index_labels return df diff --git a/bigframes/core/eval.py b/bigframes/core/eval.py index 692ca1c7bb..82add99258 100644 --- a/bigframes/core/eval.py +++ b/bigframes/core/eval.py @@ -38,12 +38,15 @@ def eval(df: dataframe.DataFrame, expr: str, target: Optional[dataframe.DataFram Returns: Result of evaluation. """ - index_resolver = { - vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries( - df.index.get_level_values(level).to_series() - ) - for level, name in enumerate(df.index.names) - } + if df._has_index: + index_resolver = { + vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries( + df.index.get_level_values(level).to_series() + ) + for level, name in enumerate(df.index.names) + } + else: + index_resolver = {} column_resolver = { vendored_pandas_eval_parsing.clean_column_name(str(name)): EvalSeries(series) for name, series in df.items() diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 0cfa5a2154..0c50c2c749 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -768,7 +768,7 @@ def _apply_series_binop_axis_0( reverse: bool = False, ) -> DataFrame: bf_series = bigframes.core.convert.to_bf_series( - other, self.index, self._session + other, self.index if self._has_index else None, self._session ) aligned_block, columns, expr_pairs = self._block._align_axis_0( bf_series._block, how=how @@ -3179,6 +3179,7 @@ def to_gbq( clustering_columns: Union[pandas.Index, Iterable[typing.Hashable]] = (), labels: dict[str, str] = {}, ) -> str: + index = index and self._has_index temp_table_ref = None if destination_table is None: diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index a1e360f73d..c5be49a56b 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -21,6 +21,23 @@ from tests.system.utils import skip_legacy_pandas +def test_null_index_to_gbq(session, scalars_df_null_index, dataset_id_not_created): + dataset_id = dataset_id_not_created + destination_table = f"{dataset_id}.scalars_df_unindexed" + + result_table = scalars_df_null_index.to_gbq( + destination_table, clustering_columns=["int64_col"] + ) + assert ( + result_table == destination_table + if destination_table + else result_table is not None + ) + + loaded_scalars_df_index = session.read_gbq(result_table) + assert not loaded_scalars_df_index.empty + + def test_null_index_materialize(scalars_df_null_index, scalars_pandas_df_default_index): bf_result = scalars_df_null_index.to_pandas() pd.testing.assert_frame_equal( @@ -83,6 +100,23 @@ def test_null_index_aggregate(scalars_df_null_index, scalars_pandas_df_default_i ) +def test_null_index_binop_series_axis_0( + scalars_df_null_index, scalars_pandas_df_default_index +): + bf_result = ( + scalars_df_null_index[["int64_col", "int64_too"]] + .add(scalars_df_null_index["int64_col"], axis=0) + .to_pandas() + ) + pd_result = scalars_pandas_df_default_index[["int64_col", "int64_too"]].add( + scalars_pandas_df_default_index.int64_col, axis=0 + ) + + pd.testing.assert_frame_equal( + bf_result, pd_result, check_dtype=False, check_index_type=False + ) + + def test_null_index_groupby_aggregate( scalars_df_null_index, scalars_pandas_df_default_index ): @@ -139,6 +173,25 @@ def test_null_index_merge_left_null_index_object( assert got.shape == expected.shape +@skip_legacy_pandas +@pytest.mark.parametrize( + ("expr",), + [ + ("new_col = int64_col + int64_too",), + ("new_col = (rowindex > 3) | bool_col",), + ("int64_too = bool_col\nnew_col2 = rowindex",), + ], +) +def test_null_index_df_eval( + scalars_df_null_index, scalars_pandas_df_default_index, expr +): + + bf_result = scalars_df_null_index.eval(expr).to_pandas() + pd_result = scalars_pandas_df_default_index.eval(expr) + + pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + + def test_null_index_merge_right_null_index_object( scalars_df_null_index, scalars_df_default_index, scalars_pandas_df_default_index ):