googleapis · GarrettWu · Oct 3, 2024 · Oct 3, 2024
@@ -1557,10 +1557,11 @@ def retrieve_repr_request_results(
        Returns a tuple of the dataframe and the overall number of rows of the query.
        """

+        # head caches full underlying expression, so row_count will be free after
        head_result = self.session._executor.head(self.expr, max_results)
        count = self.session._executor.get_row_count(self.expr)

-        arrow = self.session._executor.execute(self.expr).to_arrow_table()
+        arrow = head_result.to_arrow_table()
        df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
        self._copy_index_to_pandas(df)
        return df, count, head_result.query_job

@@ -690,7 +690,6 @@ def _repr_html_(self) -> str:
        if opts.repr_mode == "deferred":
            return formatter.repr_query_job(self._compute_dry_run())

-        self._cached()
        # TODO(swast): pass max_columns and get the true column count back. Maybe
        # get 1 more column than we have requested so that pandas can add the
        # ... for us?

@@ -591,15 +591,19 @@ def test_join_repr(scalars_dfs_maybe_ordered):
    assert actual == expected


-def test_repr_html_w_all_rows(scalars_dfs):
+def test_repr_html_w_all_rows(scalars_dfs, session):
+    metrics = session._metrics
    scalars_df, _ = scalars_dfs
    # get a pandas df of the expected format
    df, _ = scalars_df._block.to_pandas()
    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
    pandas_df.index.name = scalars_df.index.name

+    executions_pre = metrics.execution_count
    # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
    actual = scalars_df.head(10)._repr_html_()
+    executions_post = metrics.execution_count
+
    with display_options.pandas_repr(bigframes.options.display):
        pandas_repr = pandas_df.head(10)._repr_html_()

@@ -608,6 +612,7 @@ def test_repr_html_w_all_rows(scalars_dfs):
        + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
    )
    assert actual == expected
+    assert (executions_post - executions_pre) <= 2


 def test_df_column_name_with_space(scalars_dfs):