From d907ecab62b5aaa120bdeea517d741eb2c1ba277 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Nov 2023 09:26:21 -0600 Subject: [PATCH] docs: use head() to get top results, not to preview results head() requires ordering. Just peeking at the whole DataFrame or Series is actually more efficient since it doesn't require ordering and still only downloads a fraction of the results. --- samples/snippets/pandas_methods_test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/samples/snippets/pandas_methods_test.py b/samples/snippets/pandas_methods_test.py index 1f472d6346..bd8e29c003 100644 --- a/samples/snippets/pandas_methods_test.py +++ b/samples/snippets/pandas_methods_test.py @@ -22,13 +22,20 @@ def test_bigquery_dataframes_pandas_methods(): bq_df = bpd.read_gbq(query_or_table) # Inspect one of the columns (or series) of the DataFrame: - bq_df["body_mass_g"].head(10) + bq_df["body_mass_g"] # Compute the mean of this series: average_body_mass = bq_df["body_mass_g"].mean() print(f"average_body_mass: {average_body_mass}") - # Calculate the mean body_mass_g by species using the groupby operation: - bq_df["body_mass_g"].groupby(by=bq_df["species"]).mean().head() + # Find the heaviest species using the groupby operation to calculate the + # mean body_mass_g: + ( + bq_df["body_mass_g"] + .groupby(by=bq_df["species"]) + .mean() + .sort_values(ascending=False) + .head(10) + ) # [END bigquery_dataframes_pandas_methods] assert average_body_mass is not None