diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index b2f018e80a..14d8e8501c 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -92,6 +92,8 @@ def _( return args[0] < args[1] if isinstance(op, ops.eq_op.__class__): return args[0] == args[1] + if isinstance(op, ops.ne_op.__class__): + return args[0] != args[1] if isinstance(op, ops.mod_op.__class__): return args[0] % args[1] if isinstance(op, ops.coalesce_op.__class__): @@ -101,6 +103,9 @@ def _( for pred, result in zip(args[2::2], args[3::2]): return expr.when(pred).then(result) return expr + if isinstance(op, ops.where_op.__class__): + original, condition, otherwise = args + return pl.when(condition).then(original).otherwise(otherwise) raise NotImplementedError(f"Polars compiler hasn't implemented {op}") @dataclasses.dataclass(frozen=True) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 6da68e2e8f..44b1d9d4fa 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -145,12 +145,7 @@ def names(self) -> typing.Sequence[blocks.Label]: @names.setter def names(self, values: typing.Sequence[blocks.Label]): - new_block = self._block.with_index_labels(values) - if self._linked_frame is not None: - self._linked_frame._set_block( - self._linked_frame._block.with_index_labels(values) - ) - self._block = new_block + self.rename(values, inplace=True) @property def nlevels(self) -> int: @@ -411,11 +406,62 @@ def fillna(self, value=None) -> Index: ops.fillna_op.as_expr(ex.free_var("arg"), ex.const(value)) ) - def rename(self, name: Union[str, Sequence[str]]) -> Index: - names = [name] if isinstance(name, str) else list(name) + @overload + def rename( + self, + name: Union[blocks.Label, Sequence[blocks.Label]], + ) -> Index: + ... + + @overload + def rename( + self, + name: Union[blocks.Label, Sequence[blocks.Label]], + *, + inplace: Literal[False], + ) -> Index: + ... + + @overload + def rename( + self, + name: Union[blocks.Label, Sequence[blocks.Label]], + *, + inplace: Literal[True], + ) -> None: + ... + + def rename( + self, + name: Union[blocks.Label, Sequence[blocks.Label]], + *, + inplace: bool = False, + ) -> Optional[Index]: + # Tuples are allowed as a label, but we specifically exclude them here. + # This is because tuples are hashable, but we want to treat them as a + # sequence. If name is iterable, we want to assume we're working with a + # MultiIndex. Unfortunately, strings are iterable and we don't want a + # list of all the characters, so specifically exclude the non-tuple + # hashables. + if isinstance(name, blocks.Label) and not isinstance(name, tuple): + names = [name] + else: + names = list(name) + if len(names) != self.nlevels: raise ValueError("'name' must be same length as levels") - return Index(self._block.with_index_labels(names)) + + new_block = self._block.with_index_labels(names) + + if inplace: + if self._linked_frame is not None: + self._linked_frame._set_block( + self._linked_frame._block.with_index_labels(names) + ) + self._block = new_block + return None + else: + return Index(new_block) def drop( self, diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index be940a1e82..1d0d485392 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2082,15 +2082,67 @@ def reorder_levels(self, order: LevelsType, axis: int | str = 0): def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) + @overload def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... + + @overload + def rename( + self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] + ) -> DataFrame: + ... + + @overload + def rename( + self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] + ) -> None: + ... + + def rename( + self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False + ) -> Optional[DataFrame]: block = self._block.rename(columns=columns) - return DataFrame(block) + if inplace: + self._block = block + return None + else: + return DataFrame(block) + + @overload + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + ) -> DataFrame: + ... + + @overload def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: Literal[False], **kwargs, ) -> DataFrame: + ... + + @overload + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: Literal[True], + **kwargs, + ) -> None: + ... + + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: bool = False, + **kwargs, + ) -> Optional[DataFrame]: if len(kwargs) != 0: raise NotImplementedError( f"rename_axis does not currently support any keyword arguments. {constants.FEEDBACK_LINK}" @@ -2100,7 +2152,14 @@ def rename_axis( labels = mapper else: labels = [mapper] - return DataFrame(self._block.with_index_labels(labels)) + + block = self._block.with_index_labels(labels) + + if inplace: + self._block = block + return None + else: + return DataFrame(block) @validations.requires_ordering() def equals(self, other: typing.Union[bigframes.series.Series, DataFrame]) -> bool: diff --git a/bigframes/series.py b/bigframes/series.py index 866f4d0a5d..74e8d03c8d 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -31,6 +31,7 @@ Literal, Mapping, Optional, + overload, Sequence, Tuple, Union, @@ -95,6 +96,10 @@ class Series(bigframes.operations.base.SeriesMethods, vendored_pandas_series.Ser # Must be above 5000 for pandas to delegate to bigframes for binops __pandas_priority__ = 13000 + # Ensure mypy can more robustly determine the type of self._block since it + # gets set in various places. + _block: blocks.Block + def __init__(self, *args, **kwargs): self._query_job: Optional[bigquery.QueryJob] = None super().__init__(*args, **kwargs) @@ -254,22 +259,45 @@ def __iter__(self) -> typing.Iterator: def copy(self) -> Series: return Series(self._block) + @overload def rename( - self, index: Union[blocks.Label, Mapping[Any, Any]] = None, **kwargs + self, + index: Union[blocks.Label, Mapping[Any, Any]] = None, + ) -> Series: + ... + + @overload + def rename( + self, + index: Union[blocks.Label, Mapping[Any, Any]] = None, + *, + inplace: Literal[False], + **kwargs, ) -> Series: + ... + + @overload + def rename( + self, + index: Union[blocks.Label, Mapping[Any, Any]] = None, + *, + inplace: Literal[True], + **kwargs, + ) -> None: + ... + + def rename( + self, + index: Union[blocks.Label, Mapping[Any, Any]] = None, + *, + inplace: bool = False, + **kwargs, + ) -> Optional[Series]: if len(kwargs) != 0: raise NotImplementedError( f"rename does not currently support any keyword arguments. {constants.FEEDBACK_LINK}" ) - # rename the Series name - if index is None or isinstance( - index, str - ): # Python 3.9 doesn't allow isinstance of Optional - index = typing.cast(Optional[str], index) - block = self._block.with_column_labels([index]) - return Series(block) - # rename the index if isinstance(index, Mapping): index = typing.cast(Mapping[Any, Any], index) @@ -294,22 +322,61 @@ def rename( block = block.set_index(new_idx_ids, index_labels=block.index.names) - return Series(block) + if inplace: + self._block = block + return None + else: + return Series(block) # rename the Series name if isinstance(index, typing.Hashable): + # Python 3.9 doesn't allow isinstance of Optional index = typing.cast(Optional[str], index) block = self._block.with_column_labels([index]) - return Series(block) + + if inplace: + self._block = block + return None + else: + return Series(block) raise ValueError(f"Unsupported type of parameter index: {type(index)}") - @validations.requires_index + @overload + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + ) -> Series: + ... + + @overload def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: Literal[False], **kwargs, ) -> Series: + ... + + @overload + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: Literal[True], + **kwargs, + ) -> None: + ... + + @validations.requires_index + def rename_axis( + self, + mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], + *, + inplace: bool = False, + **kwargs, + ) -> Optional[Series]: if len(kwargs) != 0: raise NotImplementedError( f"rename_axis does not currently support any keyword arguments. {constants.FEEDBACK_LINK}" @@ -319,7 +386,13 @@ def rename_axis( labels = mapper else: labels = [mapper] - return Series(self._block.with_index_labels(labels)) + + block = self._block.with_index_labels(labels) + if inplace: + self._block = block + return None + else: + return Series(block) def equals( self, other: typing.Union[Series, bigframes.dataframe.DataFrame] diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py index 528835f6da..ca6fa57d0b 100644 --- a/bigframes/testing/mocks.py +++ b/bigframes/testing/mocks.py @@ -14,7 +14,7 @@ import copy import datetime -from typing import Optional, Sequence +from typing import Any, Dict, Optional, Sequence import unittest.mock as mock import google.auth.credentials @@ -23,12 +23,9 @@ import bigframes import bigframes.clients -import bigframes.core.ordering +import bigframes.core.global_session import bigframes.dataframe -import bigframes.series import bigframes.session.clients -import bigframes.session.executor -import bigframes.session.metrics """Utilities for creating test resources.""" @@ -129,7 +126,10 @@ def query_and_wait_mock(query, *args, job_config=None, **kwargs): def create_dataframe( - monkeypatch: pytest.MonkeyPatch, *, session: Optional[bigframes.Session] = None + monkeypatch: pytest.MonkeyPatch, + *, + session: Optional[bigframes.Session] = None, + data: Optional[Dict[str, Sequence[Any]]] = None, ) -> bigframes.dataframe.DataFrame: """[Experimental] Create a mock DataFrame that avoids making Google Cloud API calls. @@ -138,8 +138,11 @@ def create_dataframe( if session is None: session = create_bigquery_session() + if data is None: + data = {"col": []} + # Since this may create a ReadLocalNode, the session we explicitly pass in # might not actually be used. Mock out the global session, too. monkeypatch.setattr(bigframes.core.global_session, "_global_session", session) bigframes.options.bigquery._session_started = True - return bigframes.dataframe.DataFrame({"col": []}, session=session) + return bigframes.dataframe.DataFrame(data, session=session) diff --git a/bigframes/testing/polars_session.py b/bigframes/testing/polars_session.py index d592b49038..f8dda8da55 100644 --- a/bigframes/testing/polars_session.py +++ b/bigframes/testing/polars_session.py @@ -16,6 +16,7 @@ from typing import Optional, Union import weakref +import pandas import polars import bigframes @@ -87,5 +88,7 @@ def __init__(self): def read_pandas(self, pandas_dataframe, write_engine="default"): # override read_pandas to always keep data local-only + if isinstance(pandas_dataframe, pandas.Series): + pandas_dataframe = pandas_dataframe.to_frame() local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self) return bigframes.dataframe.DataFrame(local_block) diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py index 9d67fd33b7..d630380e7a 100644 --- a/tests/unit/test_dataframe.py +++ b/tests/unit/test_dataframe.py @@ -90,6 +90,45 @@ def test_dataframe_to_gbq_writes_to_anonymous_dataset( assert destination.startswith(anonymous_dataset_id) +def test_dataframe_rename_columns(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"col1": [], "col2": [], "col3": []} + ) + assert dataframe.columns.to_list() == ["col1", "col2", "col3"] + renamed = dataframe.rename(columns={"col1": "a", "col2": "b", "col3": "c"}) + assert renamed.columns.to_list() == ["a", "b", "c"] + + +def test_dataframe_rename_columns_inplace_returns_none(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"col1": [], "col2": [], "col3": []} + ) + assert dataframe.columns.to_list() == ["col1", "col2", "col3"] + assert ( + dataframe.rename(columns={"col1": "a", "col2": "b", "col3": "c"}, inplace=True) + is None + ) + assert dataframe.columns.to_list() == ["a", "b", "c"] + + +def test_dataframe_rename_axis(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []} + ).set_index(["index1", "index2"]) + assert list(dataframe.index.names) == ["index1", "index2"] + renamed = dataframe.rename_axis(["a", "b"]) + assert list(renamed.index.names) == ["a", "b"] + + +def test_dataframe_rename_axis_inplace_returns_none(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []} + ).set_index(["index1", "index2"]) + assert list(dataframe.index.names) == ["index1", "index2"] + assert dataframe.rename_axis(["a", "b"], inplace=True) is None + assert list(dataframe.index.names) == ["a", "b"] + + def test_dataframe_semantics_property_future_warning( monkeypatch: pytest.MonkeyPatch, ): diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py new file mode 100644 index 0000000000..97f1e4419e --- /dev/null +++ b/tests/unit/test_index.py @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes.testing import mocks + + +def test_index_rename(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"idx": [], "col": []} + ).set_index("idx") + index = dataframe.index + assert index.name == "idx" + renamed = index.rename("my_index_name") + assert renamed.name == "my_index_name" + + +def test_index_rename_inplace_returns_none(monkeypatch: pytest.MonkeyPatch): + dataframe = mocks.create_dataframe( + monkeypatch, data={"idx": [], "col": []} + ).set_index("idx") + index = dataframe.index + assert index.name == "idx" + assert index.rename("my_index_name", inplace=True) is None + + # Make sure the linked DataFrame is updated, too. + assert dataframe.index.name == "my_index_name" + assert index.name == "my_index_name" diff --git a/tests/unit/test_local_engine.py b/tests/unit/test_local_engine.py index e36dc3df3c..509bc6ade2 100644 --- a/tests/unit/test_local_engine.py +++ b/tests/unit/test_local_engine.py @@ -79,6 +79,33 @@ def test_polars_local_engine_filter(small_inline_frame: pd.DataFrame, polars_ses pandas.testing.assert_frame_equal(bf_result, pd_result) +def test_polars_local_engine_series_rename_with_mapping(polars_session): + pd_series = pd.Series( + ["a", "b", "c"], index=[1, 2, 3], dtype="string[pyarrow]", name="test_name" + ) + bf_series = bpd.Series(pd_series, session=polars_session) + + bf_result = bf_series.rename({1: 100, 2: 200, 3: 300}).to_pandas() + pd_result = pd_series.rename({1: 100, 2: 200, 3: 300}) + # pd default index is int64, bf is Int64 + pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + + +def test_polars_local_engine_series_rename_with_mapping_inplace(polars_session): + pd_series = pd.Series( + ["a", "b", "c"], index=[1, 2, 3], dtype="string[pyarrow]", name="test_name" + ) + bf_series = bpd.Series(pd_series, session=polars_session) + + pd_series.rename({1: 100, 2: 200, 3: 300}, inplace=True) + assert bf_series.rename({1: 100, 2: 200, 3: 300}, inplace=True) is None + + bf_result = bf_series.to_pandas() + pd_result = pd_series + # pd default index is int64, bf is Int64 + pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + + def test_polars_local_engine_reset_index( small_inline_frame: pd.DataFrame, polars_session ): diff --git a/tests/unit/test_series.py b/tests/unit/test_series.py index 1409209c6c..8a083d7e4a 100644 --- a/tests/unit/test_series.py +++ b/tests/unit/test_series.py @@ -12,7 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import cast + +import pytest + import bigframes.series +from bigframes.testing import mocks + + +def test_series_rename(monkeypatch: pytest.MonkeyPatch): + series = cast(bigframes.series.Series, mocks.create_dataframe(monkeypatch)["col"]) + assert series.name == "col" + renamed = series.rename("renamed_col") + assert renamed.name == "renamed_col" + + +def test_series_rename_inplace_returns_none(monkeypatch: pytest.MonkeyPatch): + series = cast(bigframes.series.Series, mocks.create_dataframe(monkeypatch)["col"]) + assert series.name == "col" + assert series.rename("renamed_col", inplace=True) is None + assert series.name == "renamed_col" + + +def test_series_rename_axis(monkeypatch: pytest.MonkeyPatch): + series = mocks.create_dataframe( + monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []} + ).set_index(["index1", "index2"])["col1"] + assert list(series.index.names) == ["index1", "index2"] + renamed = series.rename_axis(["a", "b"]) + assert list(renamed.index.names) == ["a", "b"] + + +def test_series_rename_axis_inplace_returns_none(monkeypatch: pytest.MonkeyPatch): + series = mocks.create_dataframe( + monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []} + ).set_index(["index1", "index2"])["col1"] + assert list(series.index.names) == ["index1", "index2"] + assert series.rename_axis(["a", "b"], inplace=True) is None + assert list(series.index.names) == ["a", "b"] def test_series_repr_with_uninitialized_object(): diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 63142e4dd8..c1b5b5a86b 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -11,7 +11,7 @@ """ from __future__ import annotations -from typing import Hashable, Iterable, Literal, Mapping, Optional, Sequence, Union +from typing import Hashable, Iterable, Literal, Optional, Sequence, Union from bigframes_vendored import constants import bigframes_vendored.pandas.core.generic as generic @@ -1392,8 +1392,9 @@ def align( def rename( self, *, - columns: Mapping, - ) -> DataFrame: + columns, + inplace, + ): """Rename columns. Dict values must be unique (1-to-1). Labels not contained in a dict @@ -1426,16 +1427,20 @@ def rename( Args: columns (Mapping): Dict-like from old column labels to new column labels. + inplace (bool): + Default False. Whether to modify the DataFrame rather than + creating a new one. Returns: - bigframes.pandas.DataFrame: DataFrame with the renamed axis labels. + bigframes.pandas.DataFrame | None: + DataFrame with the renamed axis labels or None if ``inplace=True``. Raises: KeyError: If any of the labels is not found. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def rename_axis(self, mapper: Optional[str], **kwargs) -> DataFrame: + def rename_axis(self, mapper, *, inplace, **kwargs): """ Set the name of the axis for the index. @@ -1443,11 +1448,15 @@ def rename_axis(self, mapper: Optional[str], **kwargs) -> DataFrame: Currently only accepts a single string parameter (the new name of the index). Args: - mapper str: + mapper (str): Value to set the axis name attribute. + inplace (bool): + Default False. Modifies the object directly, instead of + creating a new Series or DataFrame. Returns: - bigframes.pandas.DataFrame: DataFrame with the new index name + bigframes.pandas.DataFrame | None: + DataFrame with the new index name or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index be1c5034f9..7df1c7a9de 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -941,7 +941,7 @@ def fillna(self, value) -> Index: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def rename(self, name) -> Index: + def rename(self, name, *, inplace): """ Alter Index or MultiIndex name. @@ -960,10 +960,13 @@ def rename(self, name) -> Index: Args: name (label or list of labels): Name(s) to set. + inplace (bool): + Default False. Modifies the object directly, instead of + creating a new Index or MultiIndex. Returns: - bigframes.pandas.Index: - The same type as the caller. + bigframes.pandas.Index | None: + The same type as the caller or None if ``inplace=True``. Raises: ValueError: diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 673a6f362f..61cd6a47bf 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -5257,7 +5257,7 @@ def argmin(self): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def rename(self, index, **kwargs) -> Series | None: + def rename(self, index, *, inplace, **kwargs): """ Alter Series index labels or name. @@ -5301,15 +5301,17 @@ def rename(self, index, **kwargs) -> Series | None: the index. Scalar or hashable sequence-like will alter the ``Series.name`` attribute. + inplace (bool): + Default False. Whether to return a new Series. Returns: - bigframes.pandas.Series: - Series with index labels. + bigframes.pandas.Series | None: + Series with index labels or None if ``inplace=True``. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - def rename_axis(self, mapper, **kwargs): + def rename_axis(self, mapper, *, inplace, **kwargs): """ Set the name of the axis for the index or columns.