From ac4f3df170675c665e632f87aa616b3a0a3b8a79 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 16 Nov 2023 04:20:03 +0000 Subject: [PATCH] fix: avoid unnecessary row_number() on sort key for io --- bigframes/core/__init__.py | 12 ++++++++---- bigframes/core/compile/compiled.py | 29 ++++++++++++++++++++--------- bigframes/dataframe.py | 12 ++++-------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index b476961bdc..e19fec8f3f 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -125,14 +125,18 @@ def to_sql( col_id_overrides: typing.Mapping[str, str] = {}, sorted: bool = False, ) -> str: - if sorted or offset_column: - return self._compile_ordered().to_sql( - offset_column=offset_column, + array_value = self + if offset_column: + array_value = self.promote_offsets(offset_column) + if sorted: + return array_value._compile_ordered().to_sql( col_id_overrides=col_id_overrides, sorted=sorted, ) else: - return self._compile_unordered().to_sql(col_id_overrides=col_id_overrides) + return array_value._compile_unordered().to_sql( + col_id_overrides=col_id_overrides + ) def start_query( self, diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 78050ed4f0..461c2c005a 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -1031,31 +1031,42 @@ def _reproject_to_table(self) -> OrderedIR: def to_sql( self, - offset_column: typing.Optional[str] = None, col_id_overrides: typing.Mapping[str, str] = {}, sorted: bool = False, ) -> str: - offsets_id = offset_column or ORDER_ID_COLUMN - sql = ibis_bigquery.Backend().compile( self._to_ibis_expr( - ordering_mode="offset_col" - if (offset_column or sorted) - else "unordered", - order_col_name=offsets_id, + ordering_mode="unordered", col_id_overrides=col_id_overrides, + expose_hidden_cols=sorted, ) ) if sorted: + output_columns = [ + col_id_overrides.get(col) if (col in col_id_overrides) else col + for col in self.column_ids + ] + selection = ", ".join(map(lambda col_id: f"`{col_id}`", output_columns)) + order_by_clause = self._ordering_clause(self._ordering.all_ordering_columns) + sql = textwrap.dedent( - f"SELECT * EXCEPT (`{offsets_id}`)\n" + f"SELECT {selection}\n" "FROM (\n" f"{sql}\n" ")\n" - f"ORDER BY `{offsets_id}`\n" + f"{order_by_clause}\n" ) return typing.cast(str, sql) + def _ordering_clause(self, ordering: Iterable[OrderingColumnReference]) -> str: + parts = [] + for col_ref in ordering: + asc_desc = "ASC" if col_ref.direction.is_ascending else "DESC" + null_clause = "NULLS LAST" if col_ref.na_last else "NULLS FIRST" + part = f"`{col_ref.column_id}` {asc_desc} {null_clause}" + parts.append(part) + return f"ORDER BY {' ,'.join(parts)}" + def _to_ibis_expr( self, *, diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 57b4ca42cf..1f1275e217 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2577,14 +2577,10 @@ def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str: } if ordering_id is not None: - return array_value.to_sql( - offset_column=ordering_id, - col_id_overrides=id_overrides, - ) - else: - return array_value.to_sql( - col_id_overrides=id_overrides, - ) + array_value = array_value.promote_offsets(ordering_id) + return array_value.to_sql( + col_id_overrides=id_overrides, + ) def _run_io_query( self,