googleapis · tswast · Mar 19, 2024 · Mar 19, 2024
@@ -1086,19 +1086,19 @@ def head(self, n: int = 5) -> DataFrame:
    def tail(self, n: int = 5) -> DataFrame:
        return typing.cast(DataFrame, self.iloc[-n:])

-    def peek(self, n: int = 5, *, force: bool = False) -> pandas.DataFrame:
+    def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
        """
        Preview n arbitrary rows from the dataframe. No guarantees about row selection or ordering.
-        DataFrame.peek(force=False) will always be very fast, but will not succeed if data requires
-        full data scanning. Using force=True will always succeed, but may be perform expensive
-        computations.
+        ``DataFrame.peek(force=False)`` will always be very fast, but will not succeed if data requires
+        full data scanning. Using ``force=True`` will always succeed, but may be perform queries.
+        Query results will be cached so that future steps will benefit from these queries.

        Args:
            n (int, default 5):
                The number of rows to select from the dataframe. Which N rows are returned is non-deterministic.
-            force (bool, default False):
+            force (bool, default True):
                If the data cannot be peeked efficiently, the dataframe will instead be fully materialized as part
-                of the operation if force=True. If force=False, the operation will throw a ValueError.
+                of the operation if ``force=True``. If ``force=False``, the operation will throw a ValueError.
        Returns:
            pandas.DataFrame: A pandas DataFrame with n rows.


@@ -429,14 +429,14 @@ def test_rename(scalars_dfs):

 def test_df_peek(scalars_dfs):
    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = scalars_df.peek(n=3)
+    peek_result = scalars_df.peek(n=3, force=False)
    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
    assert len(peek_result) == 3


 def test_df_peek_filtered(scalars_dfs):
    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3)
+    peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False)
    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
    assert len(peek_result) == 3

@@ -449,9 +449,9 @@ def test_df_peek_exception(scalars_dfs):
        scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=False)


-def test_df_peek_force(scalars_dfs):
+def test_df_peek_force_default(scalars_dfs):
    scalars_df, scalars_pandas_df = scalars_dfs
-    peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=True)
+    peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3)
    pd.testing.assert_index_equal(
        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
    )