From 2926e09280086c6fa270cc00609102b508c4c518 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 11 Jun 2024 18:33:55 +0000 Subject: [PATCH] fix: allow `__repr__` to work with uninitialed DataFrame/Series/Index --- bigframes/core/indexes/base.py | 5 +++++ bigframes/dataframe.py | 19 +++++++++++++--- bigframes/series.py | 5 +++++ tests/unit/core/test_indexes.py | 39 +++++++++++++++++++++++++++++++++ tests/unit/test_dataframe.py | 14 ++++++++++++ tests/unit/test_series.py | 27 +++++++++++++++++++++++ 6 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 tests/unit/core/test_indexes.py create mode 100644 tests/unit/test_series.py diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 0e5082447a..8df6155591 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -243,6 +243,11 @@ def query_job(self) -> Optional[bigquery.QueryJob]: return self._query_job def __repr__(self) -> str: + # Protect against errors with uninitialized Series. See: + # https://github.com/googleapis/python-bigquery-dataframes/issues/728 + if not hasattr(self, "_block"): + return object.__repr__(self) + # TODO(swast): Add a timeout here? If the query is taking a long time, # maybe we just print the job metadata that we have so far? # TODO(swast): Avoid downloading the whole series by using job diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index f12c346776..a8e1e9dc45 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -574,9 +574,18 @@ def _getitem_bool_series(self, key: bigframes.series.Series) -> DataFrame: return DataFrame(block) def __getattr__(self, key: str): + # Protect against recursion errors with uninitialized DataFrame + # objects. See: + # https://github.com/googleapis/python-bigquery-dataframes/issues/728 + # and + # https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.html + if key == "_block": + raise AttributeError("_block") + if key in self._block.column_labels: return self.__getitem__(key) - elif hasattr(pandas.DataFrame, key): + + if hasattr(pandas.DataFrame, key): raise AttributeError( textwrap.dedent( f""" @@ -585,8 +594,7 @@ def __getattr__(self, key: str): """ ) ) - else: - raise AttributeError(key) + raise AttributeError(key) def __setattr__(self, key: str, value): if key in ["_block", "_query_job"]: @@ -616,6 +624,11 @@ def __repr__(self) -> str: Only represents the first `bigframes.options.display.max_rows`. """ + # Protect against errors with uninitialized DataFrame. See: + # https://github.com/googleapis/python-bigquery-dataframes/issues/728 + if not hasattr(self, "_block"): + return object.__repr__(self) + opts = bigframes.options.display max_results = opts.max_rows if opts.repr_mode == "deferred": diff --git a/bigframes/series.py b/bigframes/series.py index d858060aec..cb56319471 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -281,6 +281,11 @@ def reset_index( return bigframes.dataframe.DataFrame(block) def __repr__(self) -> str: + # Protect against errors with uninitialized Series. See: + # https://github.com/googleapis/python-bigquery-dataframes/issues/728 + if not hasattr(self, "_block"): + return object.__repr__(self) + # TODO(swast): Add a timeout here? If the query is taking a long time, # maybe we just print the job metadata that we have so far? # TODO(swast): Avoid downloading the whole series by using job diff --git a/tests/unit/core/test_indexes.py b/tests/unit/core/test_indexes.py new file mode 100644 index 0000000000..6e739c9dc9 --- /dev/null +++ b/tests/unit/core/test_indexes.py @@ -0,0 +1,39 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.core.indexes + + +def test_index_repr_with_uninitialized_object(): + """Ensures Index.__init__ can be paused in a visual debugger without crashing. + + Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728 + """ + # Avoid calling __init__ to simulate pausing __init__ in a debugger. + # https://stackoverflow.com/a/6384982/101923 + index = object.__new__(bigframes.core.indexes.Index) + got = repr(index) + assert "Index" in got + + +def test_multiindex_repr_with_uninitialized_object(): + """Ensures MultiIndex.__init__ can be paused in a visual debugger without crashing. + + Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728 + """ + # Avoid calling __init__ to simulate pausing __init__ in a debugger. + # https://stackoverflow.com/a/6384982/101923 + index = object.__new__(bigframes.core.indexes.MultiIndex) + got = repr(index) + assert "MultiIndex" in got diff --git a/tests/unit/test_dataframe.py b/tests/unit/test_dataframe.py index 17a8290889..6370d1b987 100644 --- a/tests/unit/test_dataframe.py +++ b/tests/unit/test_dataframe.py @@ -15,9 +15,23 @@ import google.cloud.bigquery import pytest +import bigframes.dataframe + from . import resources +def test_dataframe_repr_with_uninitialized_object(): + """Ensures DataFrame.__init__ can be paused in a visual debugger without crashing. + + Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728 + """ + # Avoid calling __init__ to simulate pausing __init__ in a debugger. + # https://stackoverflow.com/a/6384982/101923 + dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame) + got = repr(dataframe) + assert "DataFrame" in got + + def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch): dataframe = resources.create_dataframe(monkeypatch) diff --git a/tests/unit/test_series.py b/tests/unit/test_series.py new file mode 100644 index 0000000000..1409209c6c --- /dev/null +++ b/tests/unit/test_series.py @@ -0,0 +1,27 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes.series + + +def test_series_repr_with_uninitialized_object(): + """Ensures Series.__init__ can be paused in a visual debugger without crashing. + + Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728 + """ + # Avoid calling __init__ to simulate pausing __init__ in a debugger. + # https://stackoverflow.com/a/6384982/101923 + series = bigframes.series.Series.__new__(bigframes.series.Series) + got = repr(series) + assert "Series" in got