Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bbdeb70

Browse filesBrowse files
feat(bigframes): Support unstable sort_values, sort_index (#16665)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/google-cloud-python/issues) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕 --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 63b88fb commit bbdeb70
Copy full SHA for bbdeb70

11 files changed

+111-50Lines changed: 111 additions & 50 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎packages/bigframes/bigframes/core/array_value.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/core/array_value.py
+8-2Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,17 @@ def filter(self, predicate: ex.Expression):
212212
return arr.drop_columns(filter_ids)
213213

214214
def order_by(
215-
self, by: Sequence[OrderingExpression], is_total_order: bool = False
215+
self,
216+
by: Sequence[OrderingExpression],
217+
is_total_order: bool = False,
218+
stable: bool = True,
216219
) -> ArrayValue:
217220
return ArrayValue(
218221
nodes.OrderByNode(
219-
child=self.node, by=tuple(by), is_total_order=is_total_order
222+
child=self.node,
223+
by=tuple(by),
224+
is_total_order=is_total_order,
225+
stable=stable,
220226
)
221227
)
222228

Collapse file

‎packages/bigframes/bigframes/core/blocks.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/core/blocks.py
+14-17Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -395,9 +395,10 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
395395
def order_by(
396396
self,
397397
by: typing.Sequence[ordering.OrderingExpression],
398+
stable: bool = True,
398399
) -> Block:
399400
return Block(
400-
self._expr.order_by(by),
401+
self._expr.order_by(by, stable=stable),
401402
index_columns=self.index_columns,
402403
column_labels=self.column_labels,
403404
index_labels=self.index.names,
@@ -2412,13 +2413,13 @@ def _align_both_axes(
24122413
rcol_indexer if (rcol_indexer is not None) else range(len(columns))
24132414
)
24142415

2415-
left_input_lookup = (
2416-
lambda index: ex.deref(get_column_left[self.value_columns[index]])
2416+
left_input_lookup = lambda index: (
2417+
ex.deref(get_column_left[self.value_columns[index]])
24172418
if index != -1
24182419
else ex.const(None)
24192420
)
2420-
righ_input_lookup = (
2421-
lambda index: ex.deref(get_column_right[other.value_columns[index]])
2421+
righ_input_lookup = lambda index: (
2422+
ex.deref(get_column_right[other.value_columns[index]])
24222423
if index != -1
24232424
else ex.const(None)
24242425
)
@@ -2471,15 +2472,13 @@ def _align_series_block_axis_1(
24712472
rcol_indexer if (rcol_indexer is not None) else range(len(columns))
24722473
)
24732474

2474-
left_input_lookup = (
2475-
lambda index: ex.deref(get_column_left[self.value_columns[index]])
2475+
left_input_lookup = lambda index: (
2476+
ex.deref(get_column_left[self.value_columns[index]])
24762477
if index != -1
24772478
else ex.const(None)
24782479
)
2479-
righ_input_lookup = (
2480-
lambda index: ex.deref(
2481-
get_column_right[other.transpose().value_columns[index]]
2482-
)
2480+
righ_input_lookup = lambda index: (
2481+
ex.deref(get_column_right[other.transpose().value_columns[index]])
24832482
if index != -1
24842483
else ex.const(None)
24852484
)
@@ -2506,13 +2505,11 @@ def _align_pd_series_axis_1(
25062505
rcol_indexer if (rcol_indexer is not None) else range(len(columns))
25072506
)
25082507

2509-
left_input_lookup = (
2510-
lambda index: ex.deref(self.value_columns[index])
2511-
if index != -1
2512-
else ex.const(None)
2508+
left_input_lookup = lambda index: (
2509+
ex.deref(self.value_columns[index]) if index != -1 else ex.const(None)
25132510
)
2514-
righ_input_lookup = (
2515-
lambda index: ex.const(other.iloc[index]) if index != -1 else ex.const(None)
2511+
righ_input_lookup = lambda index: (
2512+
ex.const(other.iloc[index]) if index != -1 else ex.const(None)
25162513
)
25172514

25182515
left_inputs = [left_input_lookup(i) for i in lcol_indexer]
Collapse file

‎packages/bigframes/bigframes/core/indexes/base.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/core/indexes/base.py
+11-8Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -255,12 +255,6 @@ def query_job(self) -> bigquery.QueryJob:
255255
self._query_job = query_job
256256
return self._query_job
257257

258-
@property
259-
def str(self) -> bigframes.operations.strings.StringMethods:
260-
import bigframes.operations.strings
261-
262-
return bigframes.operations.strings.StringMethods(self)
263-
264258
def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
265259
"""Get integer location, slice or boolean mask for requested label.
266260
@@ -436,7 +430,8 @@ def sort_values(
436430
*,
437431
inplace: bool = False,
438432
ascending: bool = True,
439-
na_position: __builtins__.str = "last",
433+
kind: str | None = None,
434+
na_position: str = "last",
440435
) -> Index:
441436
if na_position not in ["first", "last"]:
442437
raise ValueError("Param na_position must be one of 'first' or 'last'")
@@ -448,7 +443,8 @@ def sort_values(
448443
else order.descending_over(column, na_last)
449444
for column in index_columns
450445
]
451-
return Index(self._block.order_by(ordering))
446+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in constants.STABLE_SORT_KINDS
447+
return Index(self._block.order_by(ordering, stable=is_stable))
452448

453449
def astype(
454450
self,
@@ -840,6 +836,13 @@ def _apply_binary_op(
840836
else:
841837
return NotImplemented
842838

839+
# last so as to not shadow __builtins__.str
840+
@property
841+
def str(self) -> bigframes.operations.strings.StringMethods:
842+
import bigframes.operations.strings
843+
844+
return bigframes.operations.strings.StringMethods(self)
845+
843846

844847
def _should_create_datetime_index(block: blocks.Block) -> bool:
845848
if len(block.index.dtypes) != 1:
Collapse file

‎packages/bigframes/bigframes/core/nodes.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/core/nodes.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,8 @@ def remap_refs(
991991
@dataclasses.dataclass(frozen=True, eq=False)
992992
class OrderByNode(UnaryNode):
993993
by: Tuple[OrderingExpression, ...]
994-
# This is an optimization, if true, can discard previous orderings.
994+
stable: bool = True
995+
# This is an optimization, if true, can discard previous orderings, even if doing a stable sort
995996
# might be a total ordering even if false
996997
is_total_order: bool = False
997998

Collapse file

‎packages/bigframes/bigframes/core/rewrite/order.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/core/rewrite/order.py
+6-1Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ def pull_up_order_inner(
7171
child_result, child_order = pull_up_order_inner(node.child)
7272
return child_result, child_order.with_reverse()
7373
elif isinstance(node, bigframes.core.nodes.OrderByNode):
74-
if node.is_total_order:
74+
# unstable sorts don't care about previous order, total orders override previous order
75+
if (not node.stable) or node.is_total_order:
7576
new_node = remove_order(node.child)
7677
else:
7778
new_node, child_order = pull_up_order_inner(node.child)
@@ -106,6 +107,10 @@ def pull_up_order_inner(
106107
),
107108
)
108109
)
110+
elif not node.stable:
111+
new_order = bigframes.core.ordering.RowOrdering(
112+
ordering_value_columns=tuple(new_by),
113+
)
109114
else:
110115
assert child_order
111116
new_order = child_order.with_ordering_columns(new_by)
Collapse file

‎packages/bigframes/bigframes/dataframe.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/dataframe.py
+16-9Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,6 +2418,7 @@ def sort_index(
24182418
*,
24192419
ascending: bool = ...,
24202420
inplace: Literal[False] = ...,
2421+
kind: str | None = ...,
24212422
na_position: Literal["first", "last"] = ...,
24222423
) -> DataFrame: ...
24232424

@@ -2427,6 +2428,7 @@ def sort_index(
24272428
*,
24282429
ascending: bool = ...,
24292430
inplace: Literal[True] = ...,
2431+
kind: str | None = ...,
24302432
na_position: Literal["first", "last"] = ...,
24312433
) -> None: ...
24322434

@@ -2436,6 +2438,7 @@ def sort_index(
24362438
axis: Union[int, str] = 0,
24372439
ascending: bool = True,
24382440
inplace: bool = False,
2441+
kind: str | None = None,
24392442
na_position: Literal["first", "last"] = "last",
24402443
) -> Optional[DataFrame]:
24412444
if utils.get_axis_number(axis) == 0:
@@ -2449,7 +2452,10 @@ def sort_index(
24492452
else order.descending_over(column, na_last)
24502453
for column in index_columns
24512454
]
2452-
block = self._block.order_by(ordering)
2455+
is_stable = (
2456+
kind or constants.DEFAULT_SORT_KIND
2457+
) in constants.STABLE_SORT_KINDS
2458+
block = self._block.order_by(ordering, stable=is_stable)
24532459
else: # axis=1
24542460
_, indexer = self.columns.sort_values(
24552461
return_indexer=True,
@@ -2472,7 +2478,7 @@ def sort_values(
24722478
*,
24732479
inplace: Literal[False] = ...,
24742480
ascending: bool | typing.Sequence[bool] = ...,
2475-
kind: str = ...,
2481+
kind: str | None = ...,
24762482
na_position: typing.Literal["first", "last"] = ...,
24772483
) -> DataFrame: ...
24782484

@@ -2483,7 +2489,7 @@ def sort_values(
24832489
*,
24842490
inplace: Literal[True] = ...,
24852491
ascending: bool | typing.Sequence[bool] = ...,
2486-
kind: str = ...,
2492+
kind: str | None = ...,
24872493
na_position: typing.Literal["first", "last"] = ...,
24882494
) -> None: ...
24892495

@@ -2493,7 +2499,7 @@ def sort_values(
24932499
*,
24942500
inplace: bool = False,
24952501
ascending: bool | typing.Sequence[bool] = True,
2496-
kind: str = "quicksort",
2502+
kind: str | None = None,
24972503
na_position: typing.Literal["first", "last"] = "last",
24982504
) -> Optional[DataFrame]:
24992505
if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
@@ -2525,7 +2531,8 @@ def sort_values(
25252531
if is_ascending
25262532
else order.descending_over(column_id, na_last)
25272533
)
2528-
block = self._block.order_by(ordering)
2534+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in constants.STABLE_SORT_KINDS
2535+
block = self._block.order_by(ordering, stable=is_stable)
25292536
if inplace:
25302537
self._set_block(block)
25312538
return None
@@ -2768,11 +2775,11 @@ def replace(
27682775
):
27692776
if utils.is_dict_like(value):
27702777
return self.apply(
2771-
lambda x: x.replace(
2772-
to_replace=to_replace, value=value[x.name], regex=regex
2778+
lambda x: (
2779+
x.replace(to_replace=to_replace, value=value[x.name], regex=regex)
2780+
if (x.name in value)
2781+
else x
27732782
)
2774-
if (x.name in value)
2775-
else x
27762783
)
27772784
return self.apply(
27782785
lambda x: x.replace(to_replace=to_replace, value=value, regex=regex)
Collapse file

‎packages/bigframes/bigframes/series.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/bigframes/series.py
+28-7Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,7 +1779,7 @@ def sort_values(
17791779
axis=...,
17801780
inplace: Literal[True] = ...,
17811781
ascending: bool | typing.Sequence[bool] = ...,
1782-
kind: str = ...,
1782+
kind: str | None = ...,
17831783
na_position: typing.Literal["first", "last"] = ...,
17841784
) -> None: ...
17851785

@@ -1790,7 +1790,7 @@ def sort_values(
17901790
axis=...,
17911791
inplace: Literal[False] = ...,
17921792
ascending: bool | typing.Sequence[bool] = ...,
1793-
kind: str = ...,
1793+
kind: str | None = ...,
17941794
na_position: typing.Literal["first", "last"] = ...,
17951795
) -> Series: ...
17961796

@@ -1800,19 +1800,21 @@ def sort_values(
18001800
axis=0,
18011801
inplace: bool = False,
18021802
ascending=True,
1803-
kind: str = "quicksort",
1803+
kind: str | None = None,
18041804
na_position: typing.Literal["first", "last"] = "last",
18051805
) -> Optional[Series]:
18061806
if axis != 0 and axis != "index":
18071807
raise ValueError(f"No axis named {axis} for object type Series")
18081808
if na_position not in ["first", "last"]:
18091809
raise ValueError("Param na_position must be one of 'first' or 'last'")
1810+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in constants.STABLE_SORT_KINDS
18101811
block = self._block.order_by(
18111812
[
18121813
order.ascending_over(self._value_column, (na_position == "last"))
18131814
if ascending
18141815
else order.descending_over(self._value_column, (na_position == "last"))
18151816
],
1817+
stable=is_stable,
18161818
)
18171819
if inplace:
18181820
self._set_block(block)
@@ -1822,17 +1824,35 @@ def sort_values(
18221824

18231825
@typing.overload # type: ignore[override]
18241826
def sort_index(
1825-
self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
1827+
self,
1828+
*,
1829+
axis=...,
1830+
inplace: Literal[False] = ...,
1831+
ascending=...,
1832+
kind: str | None = ...,
1833+
na_position=...,
18261834
) -> Series: ...
18271835

18281836
@typing.overload
18291837
def sort_index(
1830-
self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
1838+
self,
1839+
*,
1840+
axis=0,
1841+
inplace: Literal[True] = ...,
1842+
ascending=...,
1843+
kind: str | None = ...,
1844+
na_position=...,
18311845
) -> None: ...
18321846

18331847
@validations.requires_index
18341848
def sort_index(
1835-
self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
1849+
self,
1850+
*,
1851+
axis=0,
1852+
inplace: bool = False,
1853+
ascending=True,
1854+
kind: str | None = None,
1855+
na_position="last",
18361856
) -> Optional[Series]:
18371857
# TODO(tbergeron): Support level parameter once multi-index introduced.
18381858
if axis != 0 and axis != "index":
@@ -1847,7 +1867,8 @@ def sort_index(
18471867
else order.descending_over(column, na_last)
18481868
for column in block.index_columns
18491869
]
1850-
block = block.order_by(ordering)
1870+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in constants.STABLE_SORT_KINDS
1871+
block = block.order_by(ordering, stable=is_stable)
18511872
if inplace:
18521873
self._set_block(block)
18531874
return None
Collapse file

‎packages/bigframes/third_party/bigframes_vendored/constants.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/third_party/bigframes_vendored/constants.py
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,6 @@
5555
"_deferred",
5656
]
5757
VALID_WRITE_ENGINES = typing.get_args(WriteEngineType)
58+
59+
DEFAULT_SORT_KIND = "stable"
60+
STABLE_SORT_KINDS = ("stable", "mergesort")
Collapse file

‎packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py‎

Copy file name to clipboardExpand all lines: packages/bigframes/third_party/bigframes_vendored/pandas/core/frame.py
+7-2Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,7 +2253,7 @@ def sort_values(
22532253
*,
22542254
inplace: bool = False,
22552255
ascending: bool | Sequence[bool] = True,
2256-
kind: str = "quicksort",
2256+
kind: str | None = None,
22572257
na_position: Literal["first", "last"] = "last",
22582258
):
22592259
"""Sort by the values along row axis.
@@ -2339,7 +2339,7 @@ def sort_values(
23392339
the by.
23402340
inplace (bool, default False):
23412341
If True, perform operation in-place.
2342-
kind (str, default 'quicksort'):
2342+
kind (str, default None):
23432343
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
23442344
'heapsort', 'stable'. Ignored except when determining whether to
23452345
sort stably. 'mergesort' or 'stable' will result in stable reorder.
@@ -2363,6 +2363,7 @@ def sort_index(
23632363
axis: str | int = 0,
23642364
ascending: bool = True,
23652365
inplace: bool = False,
2366+
kind: str | None = None,
23662367
na_position: Literal["first", "last"] = "last",
23672368
):
23682369
"""Sort object by labels (along an axis).
@@ -2375,6 +2376,10 @@ def sort_index(
23752376
Sort ascending vs. descending.
23762377
inplace (bool, default False):
23772378
Whether to modify the DataFrame rather than creating a new one.
2379+
kind (str, default None):
2380+
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
2381+
'heapsort', 'stable'. Ignored except when determining whether to
2382+
sort stably. 'mergesort' or 'stable' will result in stable reorder.
23782383
na_position ({'first', 'last'}, default 'last'):
23792384
Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
23802385
Not implemented for MultiIndex.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.