From 710f2f72a62ed6e766c86e366f4a16adb1e29d58 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 28 Jun 2024 22:42:44 +0000 Subject: [PATCH] feat: More informative error when query plan too complex --- bigframes/exceptions.py | 4 ++++ bigframes/session/__init__.py | 24 ++++++++++++++++-------- tests/system/small/test_dataframe.py | 17 +++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index bc0d83b4f6..1d31749760 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -51,5 +51,9 @@ class OrderRequiredError(ValueError): """Operation requires total row ordering to be enabled.""" +class QueryComplexityError(RuntimeError): + """Query plan is too complex to execute.""" + + class TimeTravelDisabledWarning(Warning): """A query was reattempted without time travel.""" diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 3aba3581aa..867bdedf1c 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1833,14 +1833,22 @@ def _start_query( Starts BigQuery query job and waits for results. """ job_config = self._prepare_query_job_config(job_config) - return bigframes.session._io.bigquery.start_query_with_client( - self, - sql, - job_config, - max_results, - timeout, - api_name=api_name, - ) + try: + return bigframes.session._io.bigquery.start_query_with_client( + self, + sql, + job_config, + max_results, + timeout, + api_name=api_name, + ) + except google.api_core.exceptions.BadRequest as e: + # Unfortunately, this error type does not have a separate error code or exception type + if "Resources exceeded during query execution" in e.message: + new_message = "Computation is too complex to execute as a single query. Try using DataFrame.cache() on intermediate results, or setting bigframes.options.compute.enable_multi_query_execution." + raise bigframes.exceptions.QueryComplexityError(new_message) from e + else: + raise def _start_query_ml_ddl( self, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index a5c810b91b..625b920763 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -4472,13 +4472,26 @@ def test_recursion_limit(scalars_df_index): scalars_df_index.to_pandas() +def test_query_complexity_error(scalars_df_index): + # This test requires automatic caching/query decomposition to be turned off + bf_df = scalars_df_index + for _ in range(8): + bf_df = bf_df.merge(bf_df, on="int64_col").head(30) + bf_df = bf_df[bf_df.columns[:20]] + + with pytest.raises( + bigframes.exceptions.QueryComplexityError, match=r"Try using DataFrame\.cache" + ): + bf_df.to_pandas() + + def test_query_complexity_repeated_joins( scalars_df_index, scalars_pandas_df_index, with_multiquery_execution ): pd_df = scalars_pandas_df_index bf_df = scalars_df_index - for _ in range(6): - # recursively join, resuling in 2^6 - 1 = 63 joins + for _ in range(8): + # recursively join, resuling in 2^8 - 1 = 255 joins pd_df = pd_df.merge(pd_df, on="int64_col").head(30) pd_df = pd_df[pd_df.columns[:20]] bf_df = bf_df.merge(bf_df, on="int64_col").head(30)