From c57d2380dd1372d84980dc914d997fc96d68401b Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 20 Mar 2024 04:15:06 +0000
Subject: [PATCH 1/3] fix: sampling plot cannot preserve ordering if index is
 not ordered

---
 bigframes/core/blocks.py                       | 17 ++++++++++++++---
 bigframes/dataframe.py                         |  5 ++++-
 bigframes/operations/_matplotlib/core.py       |  8 +++-----
 bigframes/series.py                            |  5 ++++-
 tests/system/small/operations/test_plotting.py | 15 ++++++++++++++-
 tests/system/small/test_dataframe.py           | 18 ++++++++++++++++++
 .../bigframes_vendored/pandas/core/generic.py  |  7 +++++++
 7 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 0ebbe48cc4..27538c712a 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -563,7 +563,7 @@ def _downsample(
             block = self._split(
                 fracs=(fraction,),
                 random_state=random_state,
-                preserve_order=True,
+                sort=None,
             )[0]
             return block
         else:
@@ -579,7 +579,7 @@ def _split(
         fracs: Iterable[float] = (),
         *,
         random_state: Optional[int] = None,
-        preserve_order: Optional[bool] = False,
+        sort: Optional[bool] = False,
     ) -> List[Block]:
         """Internal function to support splitting Block to multiple parts along index axis.
 
@@ -631,7 +631,18 @@ def _split(
             typing.cast(Block, block.slice(start=lower, stop=upper))
             for lower, upper in intervals
         ]
-        if preserve_order:
+
+        if sort is True:
+            sliced_blocks = [
+                sliced_block.order_by(
+                    [
+                        ordering.OrderingColumnReference(idx_col)
+                        for idx_col in sliced_block.index_columns
+                    ]
+                )
+                for sliced_block in sliced_blocks
+            ]
+        elif sort is None:
             sliced_blocks = [
                 sliced_block.order_by([ordering.OrderingColumnReference(ordering_col)])
                 for sliced_block in sliced_blocks
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0f99a3e4db..1264e5fa73 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2497,6 +2497,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
+        sort: Optional[bool] = False,
     ) -> DataFrame:
         if n is not None and frac is not None:
             raise ValueError("Only one of 'n' or 'frac' parameter can be specified.")
@@ -2504,7 +2505,9 @@ def sample(
         ns = (n,) if n is not None else ()
         fracs = (frac,) if frac is not None else ()
         return DataFrame(
-            self._block._split(ns=ns, fracs=fracs, random_state=random_state)[0]
+            self._block._split(
+                ns=ns, fracs=fracs, random_state=random_state, sort=sort
+            )[0]
         )
 
     def _split(
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index 5c9d771f61..8c5c0e1f2e 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -47,11 +47,9 @@ def _compute_plot_data(self, data):
         # TODO: Cache the sampling data in the PlotAccessor.
         sampling_n = self.kwargs.pop("sampling_n", 100)
         sampling_random_state = self.kwargs.pop("sampling_random_state", 0)
-        return (
-            data.sample(n=sampling_n, random_state=sampling_random_state)
-            .to_pandas()
-            .sort_index()
-        )
+        return data.sample(
+            n=sampling_n, random_state=sampling_random_state, sort=None
+        ).to_pandas()
 
 
 class LinePlot(SamplingPlot):
diff --git a/bigframes/series.py b/bigframes/series.py
index 6128238057..7d8147bb11 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1529,6 +1529,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
+        sort: Optional[bool] = False,
     ) -> Series:
         if n is not None and frac is not None:
             raise ValueError("Only one of 'n' or 'frac' parameter can be specified.")
@@ -1536,7 +1537,9 @@ def sample(
         ns = (n,) if n is not None else ()
         fracs = (frac,) if frac is not None else ()
         return Series(
-            self._block._split(ns=ns, fracs=fracs, random_state=random_state)[0]
+            self._block._split(
+                ns=ns, fracs=fracs, random_state=random_state, sort=sort
+            )[0]
         )
 
     def __array_ufunc__(
diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py
index 876c8f7d04..47491cdada 100644
--- a/tests/system/small/operations/test_plotting.py
+++ b/tests/system/small/operations/test_plotting.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import numpy as np
+import pandas as pd
 import pandas._testing as tm
 import pytest
 
@@ -235,6 +236,18 @@ def test_sampling_plot_args_random_state():
         tm.assert_almost_equal(ax_0.lines[0].get_data()[1], ax_2.lines[0].get_data()[1])
 
 
+def test_sampling_preserve_ordering():
+    df = bpd.DataFrame([0.0, 1.0, 2.0, 3.0, 4.0], index=[1, 3, 4, 2, 0])
+    pd_df = pd.DataFrame([0.0, 1.0, 2.0, 3.0, 4.0], index=[1, 3, 4, 2, 0])
+    ax = df.plot.line()
+    pd_ax = pd_df.plot.line()
+    tm.assert_almost_equal(ax.get_xticks(), pd_ax.get_xticks())
+    tm.assert_almost_equal(ax.get_yticks(), pd_ax.get_yticks())
+    for line, pd_line in zip(ax.lines, pd_ax.lines):
+        # Compare y coordinates between the lines
+        tm.assert_almost_equal(line.get_data()[1], pd_line.get_data()[1])
+
+
 @pytest.mark.parametrize(
     ("kind", "col_names", "kwargs"),
     [
@@ -251,7 +264,7 @@ def test_sampling_plot_args_random_state():
             marks=pytest.mark.xfail(raises=ValueError),
         ),
         pytest.param(
-            "uknown",
+            "bar",
             ["int64_col", "int64_too"],
             {},
             marks=pytest.mark.xfail(raises=NotImplementedError),
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ee32fb25ac..73b3dbdf02 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3011,6 +3011,24 @@ def test_sample_raises_value_error(scalars_dfs):
         scalars_df.sample(frac=0.5, n=4)
 
 
+def test_sample_args_sort(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    index = [4, 3, 2, 5, 1, 0]
+    scalars_df = scalars_df.iloc[index]
+    
+    kwargs = {"frac": 1.0, "random_state": 333}
+
+    df = scalars_df.sample(**kwargs).to_pandas()
+    assert df.index.values != index
+    assert df.index.values != sorted(index)
+
+    df = scalars_df.sample(sort=True, **kwargs).to_pandas()
+    assert df.index.values == sorted(index)
+
+    df = scalars_df.sample(sort=None, **kwargs).to_pandas()
+    assert df.index.values == index
+
+
 @pytest.mark.parametrize(
     ("axis",),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 9358dca17b..fc8a06d412 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -472,6 +472,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
+        sort: Optional[bool] = None,
     ):
         """Return a random sample of items from an axis of object.
 
@@ -530,6 +531,12 @@ def sample(
                 Fraction of axis items to return. Cannot be used with `n`.
             random_state (Optional[int], default None):
                 Seed for random number generator.
+            sort (Optional[bool], default False):
+
+                - 'False' (default): No specific ordering will be applied after
+                  sampling.
+                - 'True' : Index columns will determine the sample's order.
+                - None: The sample will retain the original object's order.
 
         Returns:
             A new object of same type as caller containing `n` items randomly

From 24940a1e26b4bd7948e162047c5be5b0e9b9f62c Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Wed, 20 Mar 2024 04:20:35 +0000
Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 tests/system/small/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 73b3dbdf02..7847729620 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3015,7 +3015,7 @@ def test_sample_args_sort(scalars_dfs):
     scalars_df, _ = scalars_dfs
     index = [4, 3, 2, 5, 1, 0]
     scalars_df = scalars_df.iloc[index]
-    
+
     kwargs = {"frac": 1.0, "random_state": 333}
 
     df = scalars_df.sample(**kwargs).to_pandas()

From db507c329bc022d7ea4418395ac79598d51ebc13 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 20 Mar 2024 22:21:14 +0000
Subject: [PATCH 3/3] change sort type

---
 bigframes/core/blocks.py                              | 8 ++++----
 bigframes/dataframe.py                                | 2 +-
 bigframes/operations/_matplotlib/core.py              | 4 +++-
 bigframes/series.py                                   | 4 ++--
 tests/system/small/test_dataframe.py                  | 6 +++++-
 third_party/bigframes_vendored/pandas/core/generic.py | 8 ++++----
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 27538c712a..64c305ba0f 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -26,7 +26,7 @@
 import itertools
 import random
 import typing
-from typing import Iterable, List, Mapping, Optional, Sequence, Tuple
+from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple
 import warnings
 
 import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
@@ -563,7 +563,7 @@ def _downsample(
             block = self._split(
                 fracs=(fraction,),
                 random_state=random_state,
-                sort=None,
+                sort=False,
             )[0]
             return block
         else:
@@ -579,7 +579,7 @@ def _split(
         fracs: Iterable[float] = (),
         *,
         random_state: Optional[int] = None,
-        sort: Optional[bool] = False,
+        sort: Optional[bool | Literal["random"]] = "random",
     ) -> List[Block]:
         """Internal function to support splitting Block to multiple parts along index axis.
 
@@ -642,7 +642,7 @@ def _split(
                 )
                 for sliced_block in sliced_blocks
             ]
-        elif sort is None:
+        elif sort is False:
             sliced_blocks = [
                 sliced_block.order_by([ordering.OrderingColumnReference(ordering_col)])
                 for sliced_block in sliced_blocks
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1264e5fa73..9f970befa4 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2497,7 +2497,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
-        sort: Optional[bool] = False,
+        sort: Optional[bool | Literal["random"]] = "random",
     ) -> DataFrame:
         if n is not None and frac is not None:
             raise ValueError("Only one of 'n' or 'frac' parameter can be specified.")
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index 8c5c0e1f2e..7cbeb3df4f 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -48,7 +48,9 @@ def _compute_plot_data(self, data):
         sampling_n = self.kwargs.pop("sampling_n", 100)
         sampling_random_state = self.kwargs.pop("sampling_random_state", 0)
         return data.sample(
-            n=sampling_n, random_state=sampling_random_state, sort=None
+            n=sampling_n,
+            random_state=sampling_random_state,
+            sort=False,
         ).to_pandas()
 
 
diff --git a/bigframes/series.py b/bigframes/series.py
index 7d8147bb11..311861bd15 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -21,7 +21,7 @@
 import numbers
 import textwrap
 import typing
-from typing import Any, Mapping, Optional, Tuple, Union
+from typing import Any, Literal, Mapping, Optional, Tuple, Union
 
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
 import google.cloud.bigquery as bigquery
@@ -1529,7 +1529,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
-        sort: Optional[bool] = False,
+        sort: Optional[bool | Literal["random"]] = "random",
     ) -> Series:
         if n is not None and frac is not None:
             raise ValueError("Only one of 'n' or 'frac' parameter can be specified.")
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 7847729620..9bf4683739 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3022,10 +3022,14 @@ def test_sample_args_sort(scalars_dfs):
     assert df.index.values != index
     assert df.index.values != sorted(index)
 
+    df = scalars_df.sample(sort="random", **kwargs).to_pandas()
+    assert df.index.values != index
+    assert df.index.values != sorted(index)
+
     df = scalars_df.sample(sort=True, **kwargs).to_pandas()
     assert df.index.values == sorted(index)
 
-    df = scalars_df.sample(sort=None, **kwargs).to_pandas()
+    df = scalars_df.sample(sort=False, **kwargs).to_pandas()
     assert df.index.values == index
 
 
diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index fc8a06d412..1391f64251 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -472,7 +472,7 @@ def sample(
         frac: Optional[float] = None,
         *,
         random_state: Optional[int] = None,
-        sort: Optional[bool] = None,
+        sort: Optional[bool | Literal["random"]] = "random",
     ):
         """Return a random sample of items from an axis of object.
 
@@ -531,12 +531,12 @@ def sample(
                 Fraction of axis items to return. Cannot be used with `n`.
             random_state (Optional[int], default None):
                 Seed for random number generator.
-            sort (Optional[bool], default False):
+            sort (Optional[bool|Literal["random"]], default "random"):
 
-                - 'False' (default): No specific ordering will be applied after
+                - 'random' (default): No specific ordering will be applied after
                   sampling.
                 - 'True' : Index columns will determine the sample's order.
-                - None: The sample will retain the original object's order.
+                - 'False': The sample will retain the original object's order.
 
         Returns:
             A new object of same type as caller containing `n` items randomly