From 962e1fd356141a288bc31f3d0790640cc9b4ea61 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 21 Aug 2024 15:28:56 +0000
Subject: [PATCH 1/3] docs: add a code sample using
 `bpd.options.bigquery.ordering_mode = "partial"`

---
 .../quickstart_ordering_mode_partial_test.py  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 samples/snippets/quickstart_ordering_mode_partial_test.py

diff --git a/samples/snippets/quickstart_ordering_mode_partial_test.py b/samples/snippets/quickstart_ordering_mode_partial_test.py
new file mode 100644
index 0000000000..22ce9a333c
--- /dev/null
+++ b/samples/snippets/quickstart_ordering_mode_partial_test.py
@@ -0,0 +1,69 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes
+import bigframes.pandas
+
+
+def test_quickstart() -> None:
+    # We need a fresh session since we're modifying connection options.
+    bigframes.pandas.close_session()
+
+    # [START bigquery_bigframes_ordering_mode_partial]
+    import bigframes.pandas as bpd
+
+    bpd.options.bigquery.ordering_mode = "partial"
+    # [END bigquery_bigframes_ordering_mode_partial]
+
+    # Below is a copy of the main quickstart to check that it also works with
+    # this ordering mode.
+
+    # Create a DataFrame from a BigQuery table
+    query_or_table = "bigquery-public-data.ml_datasets.penguins"
+    df = bpd.read_gbq(query_or_table)
+
+    # Use the DataFrame just as you would a pandas DataFrame, but calculations
+    # happen in the BigQuery query engine instead of the local system.
+    average_body_mass = df["body_mass_g"].mean()
+    print(f"average_body_mass: {average_body_mass}")
+
+    # Create the Linear Regression model
+    from bigframes.ml.linear_model import LinearRegression
+
+    # Filter down to the data we want to analyze
+    adelie_data = df[df.species == "Adelie Penguin (Pygoscelis adeliae)"]
+
+    # Drop the columns we don't care about
+    adelie_data = adelie_data.drop(columns=["species"])
+
+    # Drop rows with nulls to get our training data
+    training_data = adelie_data.dropna()
+
+    # Pick feature columns and label column
+    X = training_data[
+        [
+            "island",
+            "culmen_length_mm",
+            "culmen_depth_mm",
+            "flipper_length_mm",
+            "sex",
+        ]
+    ]
+    y = training_data[["body_mass_g"]]
+
+    model = LinearRegression(fit_intercept=False)
+    model.fit(X, y)
+    model.score(X, y)
+
+    assert model is not None

From a4255de67da940b822b1f352cffa9e43c9e7d0d6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 21 Aug 2024 15:35:35 +0000
Subject: [PATCH 2/3] add warning filter too

---
 .../quickstart_ordering_mode_partial_test.py      | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/samples/snippets/quickstart_ordering_mode_partial_test.py b/samples/snippets/quickstart_ordering_mode_partial_test.py
index 22ce9a333c..c7917e4441 100644
--- a/samples/snippets/quickstart_ordering_mode_partial_test.py
+++ b/samples/snippets/quickstart_ordering_mode_partial_test.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import bigframes
-import bigframes.pandas
-
 
 def test_quickstart() -> None:
+    import bigframes.pandas
+
     # We need a fresh session since we're modifying connection options.
     bigframes.pandas.close_session()
 
@@ -26,6 +25,16 @@ def test_quickstart() -> None:
     bpd.options.bigquery.ordering_mode = "partial"
     # [END bigquery_bigframes_ordering_mode_partial]
 
+    # [START bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
+    import warnings
+
+    import bigframes.exceptions
+
+    warnings.simplefilter(
+        "ignore", category=bigframes.exceptions.AmbiguousWindowWarning
+    )
+    # [END bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
+
     # Below is a copy of the main quickstart to check that it also works with
     # this ordering mode.
 

From 6bbd36a6d455692925eaf82d4e753ef0604cedda Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 21 Aug 2024 18:28:03 +0000
Subject: [PATCH 3/3] add drop_duplicates alternative

---
 .../snippets/ordering_mode_partial_test.py    | 45 +++++++++++
 .../quickstart_ordering_mode_partial_test.py  | 78 -------------------
 2 files changed, 45 insertions(+), 78 deletions(-)
 create mode 100644 samples/snippets/ordering_mode_partial_test.py
 delete mode 100644 samples/snippets/quickstart_ordering_mode_partial_test.py

diff --git a/samples/snippets/ordering_mode_partial_test.py b/samples/snippets/ordering_mode_partial_test.py
new file mode 100644
index 0000000000..15ee4cb4ba
--- /dev/null
+++ b/samples/snippets/ordering_mode_partial_test.py
@@ -0,0 +1,45 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_quickstart() -> None:
+    import bigframes.pandas
+
+    # We need a fresh session since we're modifying connection options.
+    bigframes.pandas.close_session()
+
+    # [START bigquery_bigframes_ordering_mode_partial]
+    import bigframes.pandas as bpd
+
+    bpd.options.bigquery.ordering_mode = "partial"
+    # [END bigquery_bigframes_ordering_mode_partial]
+
+    # [START bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
+    import warnings
+
+    import bigframes.exceptions
+
+    warnings.simplefilter(
+        "ignore", category=bigframes.exceptions.AmbiguousWindowWarning
+    )
+    # [END bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
+
+    df = bpd.DataFrame({"column": [1, 2, 1, 3, 1, 2, 3]})
+
+    # [START bigquery_bigframes_ordering_mode_partial_drop_duplicates]
+    # Avoid order dependency by using groupby instead of drop_duplicates.
+    unique_col = df.groupby(["column"], as_index=False).size().drop(columns="size")
+    # [END bigquery_bigframes_ordering_mode_partial_drop_duplicates]
+
+    assert len(unique_col) == 3
diff --git a/samples/snippets/quickstart_ordering_mode_partial_test.py b/samples/snippets/quickstart_ordering_mode_partial_test.py
deleted file mode 100644
index c7917e4441..0000000000
--- a/samples/snippets/quickstart_ordering_mode_partial_test.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def test_quickstart() -> None:
-    import bigframes.pandas
-
-    # We need a fresh session since we're modifying connection options.
-    bigframes.pandas.close_session()
-
-    # [START bigquery_bigframes_ordering_mode_partial]
-    import bigframes.pandas as bpd
-
-    bpd.options.bigquery.ordering_mode = "partial"
-    # [END bigquery_bigframes_ordering_mode_partial]
-
-    # [START bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
-    import warnings
-
-    import bigframes.exceptions
-
-    warnings.simplefilter(
-        "ignore", category=bigframes.exceptions.AmbiguousWindowWarning
-    )
-    # [END bigquery_bigframes_ordering_mode_partial_ambiguous_window_warning]
-
-    # Below is a copy of the main quickstart to check that it also works with
-    # this ordering mode.
-
-    # Create a DataFrame from a BigQuery table
-    query_or_table = "bigquery-public-data.ml_datasets.penguins"
-    df = bpd.read_gbq(query_or_table)
-
-    # Use the DataFrame just as you would a pandas DataFrame, but calculations
-    # happen in the BigQuery query engine instead of the local system.
-    average_body_mass = df["body_mass_g"].mean()
-    print(f"average_body_mass: {average_body_mass}")
-
-    # Create the Linear Regression model
-    from bigframes.ml.linear_model import LinearRegression
-
-    # Filter down to the data we want to analyze
-    adelie_data = df[df.species == "Adelie Penguin (Pygoscelis adeliae)"]
-
-    # Drop the columns we don't care about
-    adelie_data = adelie_data.drop(columns=["species"])
-
-    # Drop rows with nulls to get our training data
-    training_data = adelie_data.dropna()
-
-    # Pick feature columns and label column
-    X = training_data[
-        [
-            "island",
-            "culmen_length_mm",
-            "culmen_depth_mm",
-            "flipper_length_mm",
-            "sex",
-        ]
-    ]
-    y = training_data[["body_mass_g"]]
-
-    model = LinearRegression(fit_intercept=False)
-    model.fit(X, y)
-    model.score(X, y)
-
-    assert model is not None