From 77d2c8857f2cac710c93d8c4d9e297e71e1c70b4 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 3 Oct 2024 19:34:28 +0000
Subject: [PATCH] perf: repr generates fewer queries

---
 bigframes/core/blocks.py             | 3 ++-
 bigframes/dataframe.py               | 1 -
 tests/system/small/test_dataframe.py | 7 ++++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 9e245399cd..2b3734edd5 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1557,10 +1557,11 @@ def retrieve_repr_request_results(
         Returns a tuple of the dataframe and the overall number of rows of the query.
         """
 
+        # head caches full underlying expression, so row_count will be free after
         head_result = self.session._executor.head(self.expr, max_results)
         count = self.session._executor.get_row_count(self.expr)
 
-        arrow = self.session._executor.execute(self.expr).to_arrow_table()
+        arrow = head_result.to_arrow_table()
         df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
         self._copy_index_to_pandas(df)
         return df, count, head_result.query_job
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 7fa584bcc0..efd0e65adb 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -690,7 +690,6 @@ def _repr_html_(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
-        self._cached()
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 8c2912edd4..cfd6efe9bd 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -591,15 +591,19 @@ def test_join_repr(scalars_dfs_maybe_ordered):
     assert actual == expected
 
 
-def test_repr_html_w_all_rows(scalars_dfs):
+def test_repr_html_w_all_rows(scalars_dfs, session):
+    metrics = session._metrics
     scalars_df, _ = scalars_dfs
     # get a pandas df of the expected format
     df, _ = scalars_df._block.to_pandas()
     pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
     pandas_df.index.name = scalars_df.index.name
 
+    executions_pre = metrics.execution_count
     # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
     actual = scalars_df.head(10)._repr_html_()
+    executions_post = metrics.execution_count
+
     with display_options.pandas_repr(bigframes.options.display):
         pandas_repr = pandas_df.head(10)._repr_html_()
 
@@ -608,6 +612,7 @@ def test_repr_html_w_all_rows(scalars_dfs):
         + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
     )
     assert actual == expected
+    assert (executions_post - executions_pre) <= 2
 
 
 def test_df_column_name_with_space(scalars_dfs):