From e92a19613b24d3f6ff33efada27325d654689664 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Wed, 5 Mar 2025 14:02:33 -0800
Subject: [PATCH 01/19] support cryptograph>=3.1 (#1454)

---
 bigframes/blob/_functions.py | 4 ++--
 bigframes/operations/blob.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
index 480e04f02c..830bc8de06 100644
--- a/bigframes/blob/_functions.py
+++ b/bigframes/blob/_functions.py
@@ -393,7 +393,7 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
     return all_text
 
 
-pdf_extract_def = FunctionDef(pdf_extract_func, ["pypdf", "requests"])
+pdf_extract_def = FunctionDef(pdf_extract_func, ["pypdf", "requests", "pypdf[crypto]"])
 
 
 # Extracts text from a PDF url and chunks it simultaneously
@@ -438,4 +438,4 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
     return all_text_json_string
 
 
-pdf_chunk_def = FunctionDef(pdf_chunk_func, ["pypdf", "requests"])
+pdf_chunk_def = FunctionDef(pdf_chunk_func, ["pypdf", "requests", "pypdf[crypto]"])
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 88b34bf758..6541a14655 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -553,8 +553,7 @@ def pdf_extract(
         container_cpu: Union[float, int] = 0.33,
         container_memory: str = "512Mi",
     ) -> bigframes.series.Series:
-        """Extracts and chunks text from PDF URLs and saves the text as
-           arrays of string.
+        """Extracts text from PDF URLs and saves the text as string.
 
         .. note::
             BigFrames Blob is still under experiments. It may not work and

From 67162834b5c7e39e430c797c055aee53e6aa6d74 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Wed, 5 Mar 2025 15:52:25 -0800
Subject: [PATCH 02/19] chore: add experimental blob properties tests (#1449)

* chore: add experimental blob properties tests

* include files

* fix

* fix mypy

* debug

* fix
---
 tests/system/small/blob/conftest.py        |  42 +++++++
 tests/system/small/blob/test_io.py         |  35 ++----
 tests/system/small/blob/test_properties.py | 135 +++++++++++++++++++++
 3 files changed, 190 insertions(+), 22 deletions(-)
 create mode 100644 tests/system/small/blob/conftest.py
 create mode 100644 tests/system/small/blob/test_properties.py

diff --git a/tests/system/small/blob/conftest.py b/tests/system/small/blob/conftest.py
new file mode 100644
index 0000000000..5305acc193
--- /dev/null
+++ b/tests/system/small/blob/conftest.py
@@ -0,0 +1,42 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes
+import bigframes.pandas as bpd
+
+
+@pytest.fixture(scope="session")
+def images_gcs_path() -> str:
+    return "gs://bigframes_blob_test/images/*"
+
+
+@pytest.fixture(scope="session")
+def images_uris() -> list[str]:
+    return [
+        "gs://bigframes_blob_test/images/img0.jpg",
+        "gs://bigframes_blob_test/images/img1.jpg",
+    ]
+
+
+@pytest.fixture(scope="session")
+def images_mm_df(
+    images_gcs_path, session: bigframes.Session, bq_connection: str
+) -> bpd.DataFrame:
+    bigframes.options.experiments.blob = True
+
+    return session.from_glob_path(
+        images_gcs_path, name="blob_col", connection=bq_connection
+    )
diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py
index ca068afe46..c30f7674af 100644
--- a/tests/system/small/blob/test_io.py
+++ b/tests/system/small/blob/test_io.py
@@ -18,21 +18,18 @@
 import bigframes.pandas as bpd
 
 
-def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session):
+def test_blob_create_from_uri_str(
+    bq_connection: str, session: bigframes.Session, images_uris
+):
     bigframes.options.experiments.blob = True
 
-    uris = [
-        "gs://bigframes_blob_test/images/img0.jpg",
-        "gs://bigframes_blob_test/images/img1.jpg",
-    ]
-
-    uri_series = bpd.Series(uris, session=session)
+    uri_series = bpd.Series(images_uris, session=session)
     blob_series = uri_series.str.to_blob(connection=bq_connection)
 
     pd_blob_df = blob_series.struct.explode().to_pandas()
     expected_pd_df = pd.DataFrame(
         {
-            "uri": uris,
+            "uri": images_uris,
             "version": [None, None],
             "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
@@ -44,19 +41,18 @@ def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session
     )
 
 
-def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Session):
+def test_blob_create_from_glob_path(
+    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
+):
     bigframes.options.experiments.blob = True
 
     blob_df = session.from_glob_path(
-        "gs://bigframes_blob_test/images/*", connection=bq_connection, name="blob_col"
+        images_gcs_path, connection=bq_connection, name="blob_col"
     )
     pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
     expected_df = pd.DataFrame(
         {
-            "uri": [
-                "gs://bigframes_blob_test/images/img0.jpg",
-                "gs://bigframes_blob_test/images/img1.jpg",
-            ],
+            "uri": images_uris,
             "version": [None, None],
             "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
@@ -69,22 +65,17 @@ def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Sessi
 
 
 def test_blob_create_read_gbq_object_table(
-    bq_connection: str, session: bigframes.Session
+    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
 ):
     bigframes.options.experiments.blob = True
 
-    obj_table = session._create_object_table(
-        "gs://bigframes_blob_test/images/*", bq_connection
-    )
+    obj_table = session._create_object_table(images_gcs_path, bq_connection)
 
     blob_df = session.read_gbq_object_table(obj_table, name="blob_col")
     pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas()
     expected_df = pd.DataFrame(
         {
-            "uri": [
-                "gs://bigframes_blob_test/images/img0.jpg",
-                "gs://bigframes_blob_test/images/img1.jpg",
-            ],
+            "uri": images_uris,
             "version": [None, None],
             "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
diff --git a/tests/system/small/blob/test_properties.py b/tests/system/small/blob/test_properties.py
new file mode 100644
index 0000000000..dedd1f916a
--- /dev/null
+++ b/tests/system/small/blob/test_properties.py
@@ -0,0 +1,135 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import db_dtypes  # type: ignore
+import pandas as pd
+
+import bigframes
+import bigframes.pandas as bpd
+
+
+def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.uri().to_pandas()
+    expected = pd.Series(images_uris, name="uri")
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.authorizer().to_pandas()
+    expected = pd.Series(
+        [bq_connection.casefold(), bq_connection.casefold()], name="authorizer"
+    )
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_version(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.version().to_pandas()
+    expected = pd.Series(["1739574332294150", "1739574332271343"], name="version")
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_metadata(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
+    expected = pd.Series(
+        [
+            {
+                "content_type": "image/jpeg",
+                "md5_hash": "e130ad042261a1883cd2cc06831cf748",
+                "size": 338390,
+                "updated": 1739574332000000,
+            },
+            {
+                "content_type": "image/jpeg",
+                "md5_hash": "e2ae3191ff2b809fd0935f01a537c650",
+                "size": 43333,
+                "updated": 1739574332000000,
+            },
+        ],
+        name="metadata",
+        dtype=db_dtypes.JSONDtype(),
+    )
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_content_type(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.content_type().to_pandas()
+    expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type")
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas()
+    expected = pd.Series(
+        ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"],
+        name="md5_hash",
+    )
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_size(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.size().to_pandas()
+    expected = pd.Series([338390, 43333], name="size")
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )
+
+
+def test_blob_updated(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    actual = images_mm_df["blob_col"].blob.updated().to_pandas()
+    expected = pd.Series(
+        [
+            pd.Timestamp("2025-02-14 23:05:32", tz="UTC"),
+            pd.Timestamp("2025-02-14 23:05:32", tz="UTC"),
+        ],
+        name="updated",
+    )
+
+    pd.testing.assert_series_equal(
+        actual, expected, check_dtype=False, check_index_type=False
+    )

From 7b0cab5c504ec2b24ea35b29ee32901da65681b6 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 5 Mar 2025 19:42:20 -0800
Subject: [PATCH 03/19] chore: format warning message with newlines and ansi
 color (#1447)

* use ibis fill_null instead of fillna

* minor typo in JSON warning messages

* chore: format warning message with newlines and ansi color
---
 bigframes/_config/bigquery_options.py         |  8 +--
 bigframes/_config/experiment_options.py       |  6 +--
 bigframes/core/array_value.py                 | 14 +++--
 bigframes/core/blocks.py                      |  9 ++--
 bigframes/core/compile/aggregate_compiler.py  | 16 ++----
 bigframes/core/global_session.py              |  2 +-
 bigframes/core/indexers.py                    |  2 +-
 bigframes/core/utils.py                       |  2 +-
 bigframes/dataframe.py                        |  7 ++-
 bigframes/exceptions.py                       | 26 +++++++++
 bigframes/functions/_function_session.py      |  6 +--
 bigframes/functions/function.py               |  2 +-
 bigframes/ml/base.py                          |  5 +-
 bigframes/ml/llm.py                           | 54 +++++++++++--------
 bigframes/ml/remote.py                        |  3 +-
 bigframes/operations/_matplotlib/core.py      |  9 ++--
 bigframes/operations/semantics.py             | 20 +++----
 bigframes/session/__init__.py                 | 16 +++---
 .../session/_io/bigquery/read_gbq_table.py    |  8 +--
 bigframes/session/clients.py                  |  6 ++-
 bigframes/session/executor.py                 |  9 ++--
 bigframes/streaming/dataframe.py              |  8 ++-
 tests/system/large/test_dataframe_io.py       |  4 +-
 tests/system/large/test_location.py           |  6 +--
 .../small/functions/test_remote_function.py   |  2 +-
 tests/unit/_config/test_bigquery_options.py   | 17 +++---
 26 files changed, 160 insertions(+), 107 deletions(-)

diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 3968e98a69..84bc4f6d01 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -59,7 +59,9 @@ def _get_validated_location(value: Optional[str]) -> Optional[str]:
     # -> bpd.options.bigquery.location = "us-central-1"
     # -> location.setter
     # -> _get_validated_location
-    msg = UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility)
+    msg = bfe.format_message(
+        UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility)
+    )
     warnings.warn(msg, stacklevel=3, category=bfe.UnknownLocationWarning)
 
     return value
@@ -294,7 +296,7 @@ def use_regional_endpoints(self, value: bool):
             )
 
         if value:
-            msg = (
+            msg = bfe.format_message(
                 "Use of regional endpoints is a feature in preview and "
                 "available only in selected regions and projects. "
             )
@@ -354,7 +356,7 @@ def client_endpoints_override(self) -> dict:
 
     @client_endpoints_override.setter
     def client_endpoints_override(self, value: dict):
-        msg = (
+        msg = bfe.format_message(
             "This is an advanced configuration option for directly setting endpoints. "
             "Incorrect use may lead to unexpected behavior or system instability. "
             "Proceed only if you fully understand its implications."
diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py
index b958667628..3d52976004 100644
--- a/bigframes/_config/experiment_options.py
+++ b/bigframes/_config/experiment_options.py
@@ -34,7 +34,7 @@ def semantic_operators(self) -> bool:
     @semantic_operators.setter
     def semantic_operators(self, value: bool):
         if value is True:
-            msg = (
+            msg = bfe.format_message(
                 "Semantic operators are still under experiments, and are subject "
                 "to change in the future."
             )
@@ -48,7 +48,7 @@ def blob(self) -> bool:
     @blob.setter
     def blob(self, value: bool):
         if value is True:
-            msg = (
+            msg = bfe.format_message(
                 "BigFrames Blob is still under experiments. It may not work and "
                 "subject to change in the future."
             )
@@ -62,7 +62,7 @@ def udf(self) -> bool:
     @udf.setter
     def udf(self, value: bool):
         if value is True:
-            msg = (
+            msg = bfe.format_message(
                 "BigFrames managed function (udf) is still under experiments. "
                 "It may not work and subject to change in the future."
             )
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 9325e3e5a8..9c44255941 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -107,8 +107,8 @@ def from_table(
         if offsets_col and primary_key:
             raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
-            msg = (
-                "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is"
+            msg = bfe.format_message(
+                "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is "
                 "in preview; this behavior may change in future versions."
             )
             warnings.warn(msg, bfe.PreviewWarning)
@@ -232,7 +232,9 @@ def slice(
         self, start: Optional[int], stop: Optional[int], step: Optional[int]
     ) -> ArrayValue:
         if self.node.order_ambiguous and not (self.session._strictly_ordered):
-            msg = "Window ordering may be ambiguous, this can cause unstable results."
+            msg = bfe.format_message(
+                "Window ordering may be ambiguous, this can cause unstable results."
+            )
             warnings.warn(msg, bfe.AmbiguousWindowWarning)
         return ArrayValue(
             nodes.SliceNode(
@@ -254,7 +256,7 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]:
                     "Generating offsets not supported in partial ordering mode"
                 )
             else:
-                msg = (
+                msg = bfe.format_message(
                     "Window ordering may be ambiguous, this can cause unstable results."
                 )
                 warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
@@ -417,7 +419,9 @@ def project_window_op(
                         "Generating offsets not supported in partial ordering mode"
                     )
                 else:
-                    msg = "Window ordering may be ambiguous, this can cause unstable results."
+                    msg = bfe.format_message(
+                        "Window ordering may be ambiguous, this can cause unstable results."
+                    )
                     warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
 
         output_name = self._gen_namespaced_uid()
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 7ac2b03f28..b4e3ea0f86 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -64,6 +64,7 @@
 import bigframes.core.utils as utils
 import bigframes.core.window_spec as windows
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.features
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -630,12 +631,12 @@ def _materialize_local(
             # Since we cannot acquire the table size without a query_job,
             # we skip the sampling.
             if sample_config.enable_downsampling:
-                warnings.warn(
+                msg = bfe.format_message(
                     "Sampling is disabled and there is no download size limit when 'allow_large_results' is set to "
                     "False. To prevent downloading excessive data, it is recommended to use the peek() method, or "
-                    "limit the data with methods like .head() or .sample() before proceeding with downloads.",
-                    UserWarning,
+                    "limit the data with methods like .head() or .sample() before proceeding with downloads."
                 )
+                warnings.warn(msg, category=UserWarning)
             fraction = 2
 
         # TODO: Maybe materialize before downsampling
@@ -652,7 +653,7 @@ def _materialize_local(
                     " # Setting it to None will download all the data\n"
                     f"{constants.FEEDBACK_LINK}"
                 )
-            msg = (
+            msg = bfe.format_message(
                 f"The data size ({table_mb:.2f} MB) exceeds the maximum download limit of"
                 f"({max_download_size} MB). It will be downsampled to {max_download_size} "
                 "MB for download.\nPlease refer to the documentation for configuring "
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index edf1e14b3a..93fddf196e 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -165,7 +165,7 @@ def _(
 ) -> ibis_types.NumericValue:
     # Will be null if all inputs are null. Pandas defaults to zero sum though.
     bq_sum = _apply_window_if_present(column.sum(), window)
-    return bq_sum.fillna(ibis_types.literal(0))
+    return bq_sum.fill_null(ibis_types.literal(0))
 
 
 @compile_unary_agg.register
@@ -610,12 +610,7 @@ def _(
     result = _apply_window_if_present(_is_true(column).all(), window)
     literal = ibis_types.literal(True)
 
-    return cast(
-        ibis_types.BooleanScalar,
-        result.fill_null(literal)
-        if hasattr(result, "fill_null")
-        else result.fillna(literal),
-    )
+    return cast(ibis_types.BooleanScalar, result.fill_null(literal))
 
 
 @compile_unary_agg.register
@@ -628,12 +623,7 @@ def _(
     result = _apply_window_if_present(_is_true(column).any(), window)
     literal = ibis_types.literal(False)
 
-    return cast(
-        ibis_types.BooleanScalar,
-        result.fill_null(literal)
-        if hasattr(result, "fill_null")
-        else result.fillna(literal),
-    )
+    return cast(ibis_types.BooleanScalar, result.fill_null(literal))
 
 
 @compile_ordered_unary_agg.register
diff --git a/bigframes/core/global_session.py b/bigframes/core/global_session.py
index 8b32fee5b4..d4d70f5a06 100644
--- a/bigframes/core/global_session.py
+++ b/bigframes/core/global_session.py
@@ -39,7 +39,7 @@ def _try_close_session(session: bigframes.session.Session):
         session_id = session.session_id
         location = session._location
         project_id = session._project
-        msg = (
+        msg = bfe.format_message(
             f"Session cleanup failed for session with id: {session_id}, "
             f"location: {location}, project: {project_id}"
         )
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index 97115a3ed0..c0c4d9ec11 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -407,7 +407,7 @@ def _struct_accessor_check_and_warn(
         return
 
     if not bigframes.dtypes.is_string_like(series.index.dtype):
-        msg = (
+        msg = bfe.format_message(
             "Are you trying to access struct fields? If so, please use Series.struct.field(...) "
             "method instead."
         )
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index 18061dca18..e38c43e73e 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -196,7 +196,7 @@ def decorator(func):
 
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
-            warnings.warn(msg, category=bfe.PreviewWarning)
+            warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)
             return func(*args, **kwargs)
 
         return wrapper
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index b5174dbd3e..a48e06d86c 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1581,7 +1581,10 @@ def to_arrow(
         Returns:
             pyarrow.Table: A pyarrow Table with all rows and columns of this DataFrame.
         """
-        msg = "to_arrow is in preview. Types and unnamed / duplicate name columns may change in future."
+        msg = bfe.format_message(
+            "to_arrow is in preview. Types and unnamed or duplicate name columns may "
+            "change in future."
+        )
         warnings.warn(msg, category=bfe.PreviewWarning)
 
         pa_table, query_job = self._block.to_arrow(
@@ -4104,7 +4107,7 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
         # to the applied function should be a Series, not a scalar.
 
         if utils.get_axis_number(axis) == 1:
-            msg = "axis=1 scenario is in preview."
+            msg = bfe.format_message("axis=1 scenario is in preview.")
             warnings.warn(msg, category=bfe.PreviewWarning)
 
             # TODO(jialuo): Deprecate the "bigframes_remote_function" attribute.
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
index 97e2da40a1..8b35d9122b 100644
--- a/bigframes/exceptions.py
+++ b/bigframes/exceptions.py
@@ -14,6 +14,8 @@
 
 """Public exceptions and warnings used across BigQuery DataFrames."""
 
+import textwrap
+
 # NOTE: This module should not depend on any others in the package.
 
 
@@ -87,3 +89,27 @@ class ApiDeprecationWarning(FutureWarning):
 
 class BadIndexerKeyWarning(Warning):
     """The indexer key is not used correctly."""
+
+
+class ColorFormatter:
+    WARNING = "\033[93m"
+    ENDC = "\033[0m"
+
+
+def format_message(message: str, fill: bool = True):
+    """Formats a warning message with ANSI color codes for the warning color.
+
+    Args:
+        message: The warning message string.
+        fill: Whether to wrap the message text using `textwrap.fill`.
+            Defaults to True.  Set to False to prevent wrapping,
+            especially if the message already contains newlines.
+
+    Returns:
+        The formatted message string, with ANSI color codes for warning color
+        if color is supported, otherwise the original message.  If `fill` is
+        True, the message will be wrapped to fit the terminal width.
+    """
+    if fill:
+        message = textwrap.fill(message)
+    return ColorFormatter.WARNING + message + ColorFormatter.ENDC
diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py
index 20dcf45103..ce0ade26ff 100644
--- a/bigframes/functions/_function_session.py
+++ b/bigframes/functions/_function_session.py
@@ -489,7 +489,7 @@ def remote_function(
 
         if cloud_function_ingress_settings is None:
             cloud_function_ingress_settings = "all"
-            msg = (
+            msg = bfe.format_message(
                 "The `cloud_function_ingress_settings` are set to 'all' by default, "
                 "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. "
                 "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. "
@@ -549,7 +549,7 @@ def wrapper(func):
                 (input_type := input_types[0]) == bf_series.Series
                 or input_type == pandas.Series
             ):
-                msg = "input_types=Series is in preview."
+                msg = bfe.format_message("input_types=Series is in preview.")
                 warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
                 # we will model the row as a json serialized string containing the data
@@ -836,7 +836,7 @@ def wrapper(func):
                 (input_type := input_types[0]) == bf_series.Series
                 or input_type == pandas.Series
             ):
-                msg = "input_types=Series is in preview."
+                msg = bfe.format_message("input_types=Series is in preview.")
                 warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
                 # we will model the row as a json serialized string containing
diff --git a/bigframes/functions/function.py b/bigframes/functions/function.py
index 392a209714..16416eb864 100644
--- a/bigframes/functions/function.py
+++ b/bigframes/functions/function.py
@@ -231,7 +231,7 @@ def func(*bigframes_args, **bigframes_kwargs):
             )
         function_input_dtypes.append(input_dtype)
     if has_unknown_dtypes:
-        msg = (
+        msg = bfe.format_message(
             "The function has one or more missing input data types. BigQuery DataFrames "
             f"will assume default data type {bigframes.dtypes.DEFAULT_DTYPE} for them."
         )
diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index c353e47f3a..a0800c19e6 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -27,6 +27,7 @@
 
 import bigframes_vendored.sklearn.base
 
+import bigframes.exceptions as bfe
 from bigframes.ml import core
 import bigframes.ml.utils as utils
 import bigframes.pandas as bpd
@@ -269,7 +270,7 @@ def _predict_and_retry(
 
             if df_succ.empty:
                 if max_retries > 0:
-                    msg = "Can't make any progress, stop retrying."
+                    msg = bfe.format_message("Can't make any progress, stop retrying.")
                     warnings.warn(msg, category=RuntimeWarning)
                 break
 
@@ -281,7 +282,7 @@ def _predict_and_retry(
                 break
 
         if not df_fail.empty:
-            msg = (
+            msg = bfe.format_message(
                 f"Some predictions failed. Check column {self._status_col} for detailed "
                 "status. You may want to filter the failed rows and retry."
             )
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index 72c49e124b..0117444f16 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -189,9 +189,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _TEXT_GENERATOR_ENDPOINTS:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
+                )
             )
             warnings.warn(msg)
 
@@ -368,7 +370,7 @@ def predict(
         df = self._bqml_model.generate_text(X, options)
 
         if (df[_ML_GENERATE_TEXT_STATUS] != "").any():
-            msg = (
+            msg = exceptions.format_message(
                 f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for "
                 "detailed status. You may want to filter the failed rows and retry."
             )
@@ -522,9 +524,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
+                )
             )
             warnings.warn(msg)
 
@@ -598,7 +602,7 @@ def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame:
         )
 
         if (df[_ML_EMBED_TEXT_STATUS] != "").any():
-            msg = (
+            msg = exceptions.format_message(
                 f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for "
                 "detailed status. You may want to filter the failed rows and retry."
             )
@@ -666,9 +670,11 @@ def _create_bqml_model(self):
         )
 
         if self.model_name not in _TEXT_EMBEDDING_ENDPOINTS:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS),
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS),
+                )
             )
             warnings.warn(msg)
 
@@ -805,9 +811,11 @@ def _create_bqml_model(self):
         )
 
         if self.model_name != _MULTIMODAL_EMBEDDING_001_ENDPOINT:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=_MULTIMODAL_EMBEDDING_001_ENDPOINT,
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=_MULTIMODAL_EMBEDDING_001_ENDPOINT,
+                )
             )
             warnings.warn(msg)
 
@@ -952,7 +960,7 @@ def __init__(
         max_iterations: int = 300,
     ):
         if model_name in _GEMINI_PREVIEW_ENDPOINTS:
-            msg = (
+            msg = exceptions.format_message(
                 f'Model {model_name} is subject to the "Pre-GA Offerings Terms" in '
                 "the General Service Terms section of the Service Specific Terms"
                 "(https://cloud.google.com/terms/service-terms#1). Pre-GA products and "
@@ -976,9 +984,11 @@ def _create_bqml_model(self):
         )
 
         if self.model_name not in _GEMINI_ENDPOINTS:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=", ".join(_GEMINI_ENDPOINTS),
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_GEMINI_ENDPOINTS),
+                )
             )
             warnings.warn(msg)
 
@@ -1343,9 +1353,11 @@ def _create_bqml_model(self):
         )
 
         if self.model_name not in _CLAUDE_3_ENDPOINTS:
-            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
-                model_name=self.model_name,
-                known_models=", ".join(_CLAUDE_3_ENDPOINTS),
+            msg = exceptions.format_message(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_CLAUDE_3_ENDPOINTS),
+                )
             )
             warnings.warn(msg)
         options = {
diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py
index 6ee6840656..cc711cbe3b 100644
--- a/bigframes/ml/remote.py
+++ b/bigframes/ml/remote.py
@@ -21,6 +21,7 @@
 
 from bigframes.core import global_session, log_adapter
 import bigframes.dataframe
+import bigframes.exceptions as bfe
 from bigframes.ml import base, core, globals, utils
 import bigframes.session
 
@@ -119,7 +120,7 @@ def predict(
 
         # unlike LLM models, the general remote model status is null for successful runs.
         if (df[_REMOTE_MODEL_STATUS].notna()).any():
-            msg = (
+            msg = bfe.format_message(
                 f"Some predictions failed. Check column {_REMOTE_MODEL_STATUS} for "
                 "detailed status. You may want to filter the failed rows and retry."
             )
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index 9c68a2c5ca..a5f53b9f64 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -20,6 +20,7 @@
 import pandas as pd
 
 import bigframes.dtypes as dtypes
+import bigframes.exceptions as bfe
 
 DEFAULT_SAMPLING_N = 1000
 DEFAULT_SAMPLING_STATE = 0
@@ -70,10 +71,12 @@ def _compute_sample_data(self, data):
         if self._sampling_warning_msg is not None:
             total_n = data.shape[0]
             if sampling_n < total_n:
-                msg = self._sampling_warning_msg.format(
-                    sampling_n=sampling_n, total_n=total_n
+                msg = bfe.format_message(
+                    self._sampling_warning_msg.format(
+                        sampling_n=sampling_n, total_n=total_n
+                    )
                 )
-                warnings.warn(msg)
+                warnings.warn(msg, category=UserWarning)
 
         sampling_random_state = self.kwargs.pop(
             "sampling_random_state", DEFAULT_SAMPLING_STATE
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index 3b7a77e5b7..686db50a43 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -141,11 +141,11 @@ def agg(
         column = columns[0]
 
         if ground_with_google_search:
-            msg = (
+            msg = exceptions.format_message(
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
-            warnings.warn(msg)
+            warnings.warn(msg, category=UserWarning)
 
         user_instruction = self._format_instruction(instruction, columns)
 
@@ -372,11 +372,11 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
                 raise ValueError(f"Column {column} not found.")
 
         if ground_with_google_search:
-            msg = (
+            msg = exceptions.format_message(
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
-            warnings.warn(msg)
+            warnings.warn(msg, category=UserWarning)
 
         self._confirm_operation(len(self._df))
 
@@ -471,11 +471,11 @@ def map(
                 raise ValueError(f"Column {column} not found.")
 
         if ground_with_google_search:
-            msg = (
+            msg = exceptions.format_message(
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
-            warnings.warn(msg)
+            warnings.warn(msg, category=UserWarning)
 
         self._confirm_operation(len(self._df))
 
@@ -573,11 +573,11 @@ def join(
         columns = self._parse_columns(instruction)
 
         if ground_with_google_search:
-            msg = (
+            msg = exceptions.format_message(
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
-            warnings.warn(msg)
+            warnings.warn(msg, category=UserWarning)
 
         work_estimate = len(self._df) * len(other)
         self._confirm_operation(work_estimate)
@@ -816,11 +816,11 @@ def top_k(
             )
 
         if ground_with_google_search:
-            msg = (
+            msg = exceptions.format_message(
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
-            warnings.warn(msg)
+            warnings.warn(msg, category=UserWarning)
 
         work_estimate = int(len(self._df) * (len(self._df) - 1) / 2)
         self._confirm_operation(work_estimate)
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 13e49fca42..3f081e2177 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -152,7 +152,9 @@ def __init__(
 
         if context.location is None:
             self._location = "US"
-            msg = f"No explicit location is set, so using location {self._location} for the session."
+            msg = bfe.format_message(
+                f"No explicit location is set, so using location {self._location} for the session."
+            )
             # User's code
             # -> get_global_session()
             # -> connect()
@@ -344,25 +346,25 @@ def _project(self):
     @property
     def bytes_processed_sum(self):
         """The sum of all bytes processed by bigquery jobs using this session."""
-        warnings.warn(
+        msg = bfe.format_message(
             "Queries executed with `allow_large_results=False` within the session will not "
             "have their bytes processed counted in this sum. If you need precise "
             "bytes processed information, query the `INFORMATION_SCHEMA` tables "
             "to get relevant metrics.",
-            UserWarning,
         )
+        warnings.warn(msg, UserWarning)
         return self._metrics.bytes_processed
 
     @property
     def slot_millis_sum(self):
         """The sum of all slot time used by bigquery jobs in this session."""
-        warnings.warn(
+        msg = bfe.format_message(
             "Queries executed with `allow_large_results=False` within the session will not "
             "have their slot milliseconds counted in this sum.  If you need precise slot "
             "milliseconds information, query the `INFORMATION_SCHEMA` tables "
             "to get relevant metrics.",
-            UserWarning,
         )
+        warnings.warn(msg, UserWarning)
         return self._metrics.slot_millis
 
     @property
@@ -612,7 +614,9 @@ def read_gbq_table_streaming(
             bigframes.streaming.dataframe.StreamingDataFrame:
                A StreamingDataFrame representing results of the table.
         """
-        msg = "The bigframes.streaming module is a preview feature, and subject to change."
+        msg = bfe.format_message(
+            "The bigframes.streaming module is a preview feature, and subject to change."
+        )
         warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
         import bigframes.streaming.dataframe as streaming_dataframe
diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py
index ed68762ee8..9fa97cb6e1 100644
--- a/bigframes/session/_io/bigquery/read_gbq_table.py
+++ b/bigframes/session/_io/bigquery/read_gbq_table.py
@@ -59,7 +59,7 @@ def get_table_metadata(
         # Cache hit could be unexpected. See internal issue 329545805.
         # Raise a warning with more information about how to avoid the
         # problems with the cache.
-        msg = (
+        msg = bfe.format_message(
             f"Reading cached table from {snapshot_timestamp} to avoid "
             "incompatibilies with previous reads of this table. To read "
             "the latest version, set `use_cache=False` or close the "
@@ -104,7 +104,7 @@ def validate_table(
     # Only true tables support time travel
     elif table.table_type != "TABLE":
         if table.table_type == "MATERIALIZED_VIEW":
-            msg = (
+            msg = bfe.format_message(
                 "Materialized views do not support FOR SYSTEM_TIME AS OF queries. "
                 "Attempting query without time travel. Be aware that as materialized views "
                 "are updated periodically, modifications to the underlying data in the view may "
@@ -142,7 +142,7 @@ def validate_table(
         snapshot_sql, job_config=bigquery.QueryJobConfig(dry_run=True)
     )
     if time_travel_not_found:
-        msg = (
+        msg = bfe.format_message(
             "NotFound error when reading table with time travel."
             " Attempting query without time travel. Warning: Without"
             " time travel, modifications to the underlying table may"
@@ -269,7 +269,7 @@ def get_index_cols(
         # resource utilization because of the default sequential index. See
         # internal issue 335727141.
         if _is_table_clustered_or_partitioned(table) and not primary_keys:
-            msg = (
+            msg = bfe.format_message(
                 f"Table '{str(table.reference)}' is clustered and/or "
                 "partitioned, but BigQuery DataFrames was not able to find a "
                 "suitable index. To avoid this warning, set at least one of: "
diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py
index fd8f387c3d..5b707ad478 100644
--- a/bigframes/session/clients.py
+++ b/bigframes/session/clients.py
@@ -32,6 +32,7 @@
 import pydata_google_auth
 
 import bigframes.constants
+import bigframes.exceptions as bfe
 import bigframes.version
 
 _ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT"
@@ -102,12 +103,13 @@ def __init__(
             and location.lower()
             not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
         ):
-            warnings.warn(
+            msg = bfe.format_message(
                 bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format(
                     location=location
                 ),
-                category=FutureWarning,
+                fill=False,
             )
+            warnings.warn(msg, category=FutureWarning)
         self._location = location
         self._use_regional_endpoints = use_regional_endpoints
 
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 22d1c1dcea..0644b0e6d9 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -48,6 +48,7 @@
 import bigframes.core.schema
 import bigframes.core.tree_properties as tree_properties
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.features
 import bigframes.session._io.bigquery as bq_io
 import bigframes.session.metrics
@@ -271,13 +272,13 @@ def iterator_supplier():
             size_bytes = None
 
         if size_bytes is not None and size_bytes >= MAX_SMALL_RESULT_BYTES:
-            warnings.warn(
+            msg = bfe.format_message(
                 "The query result size has exceeded 10 GB. In BigFrames 2.0 and "
                 "later, you might need to manually set `allow_large_results=True` in "
                 "the IO method or adjust the BigFrames option: "
-                "`bigframes.options.bigquery.allow_large_results=True`.",
-                FutureWarning,
+                "`bigframes.options.bigquery.allow_large_results=True`."
             )
+            warnings.warn(msg, FutureWarning)
         # Runs strict validations to ensure internal type predictions and ibis are completely in sync
         # Do not execute these validations outside of testing suite.
         if "PYTEST_CURRENT_TEST" in os.environ:
@@ -383,7 +384,7 @@ def peek(
         """
         plan = self.replace_cached_subtrees(array_value.node)
         if not tree_properties.can_fast_peek(plan):
-            msg = "Peeking this value cannot be done efficiently."
+            msg = bfe.format_message("Peeking this value cannot be done efficiently.")
             warnings.warn(msg)
         if use_explicit_destination is None:
             use_explicit_destination = bigframes.options.bigquery.allow_large_results
diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py
index 2180a66207..4acefd6283 100644
--- a/bigframes/streaming/dataframe.py
+++ b/bigframes/streaming/dataframe.py
@@ -372,7 +372,9 @@ def _to_bigtable(
             For example, the job can be cancelled or its error status
             can be examined.
     """
-    msg = "The bigframes.streaming module is a preview feature, and subject to change."
+    msg = bfe.format_message(
+        "The bigframes.streaming module is a preview feature, and subject to change."
+    )
     warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
     # get default client if not passed
@@ -484,7 +486,9 @@ def _to_pubsub(
             For example, the job can be cancelled or its error status
             can be examined.
     """
-    msg = "The bigframes.streaming module is a preview feature, and subject to change."
+    msg = bfe.format_message(
+        "The bigframes.streaming module is a preview feature, and subject to change."
+    )
     warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
     # get default client if not passed
diff --git a/tests/system/large/test_dataframe_io.py b/tests/system/large/test_dataframe_io.py
index c055babce6..76a7001fe3 100644
--- a/tests/system/large/test_dataframe_io.py
+++ b/tests/system/large/test_dataframe_io.py
@@ -46,9 +46,7 @@ def test_to_pandas_batches_override_global_option(
             )
             assert len(w) == 2
             assert issubclass(w[0].category, FutureWarning)
-            assert str(w[0].message).startswith(
-                "The query result size has exceeded 10 GB."
-            )
+            assert "The query result size has exceeded 10 GB." in str(w[0].message)
 
 
 def test_to_pandas_raise_when_large_result_not_allowed(session):
diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py
index 0b4a7afe2b..7801f5dada 100644
--- a/tests/system/large/test_location.py
+++ b/tests/system/large/test_location.py
@@ -163,11 +163,7 @@ def test_bq_lep_endpoints(bigquery_location):
             location=bigquery_location, use_regional_endpoints=True
         )
         assert len(record) == 1
-        assert typing.cast(Warning, record[0].message).args[
-            0
-        ] == bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format(
-            location=bigquery_location
-        )
+        assert bigquery_location in typing.cast(Warning, record[0].message).args[0]
 
     # Verify that location and endpoints are correctly set for the BigQuery API
     # client
diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py
index c12d0e03f5..075a57f23d 100644
--- a/tests/system/small/functions/test_remote_function.py
+++ b/tests/system/small/functions/test_remote_function.py
@@ -929,7 +929,7 @@ def test_read_gbq_function_requires_explicit_types(
     )
     with pytest.warns(
         bigframes.exceptions.UnknownDataTypeWarning,
-        match="missing input data types.*assume default data type",
+        match=r"missing input data types[\s\S]*assume default data type",
     ):
         bff.read_gbq_function(
             str(only_return_type_specified.reference),
diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py
index 31f43ffee5..98a74d4e4c 100644
--- a/tests/unit/_config/test_bigquery_options.py
+++ b/tests/unit/_config/test_bigquery_options.py
@@ -164,14 +164,19 @@ def set_location_property():
         options.location = invalid_location
 
     for op in [set_location_in_constructor, set_location_property]:
-        with pytest.warns(
-            bigframes.exceptions.UnknownLocationWarning,
-            match=re.escape(
-                f"The location '{invalid_location}' is set to an unknown value. Did you mean '{possibility}'?"
-            ),
-        ):
+        with warnings.catch_warnings(record=True) as w:
             op()
 
+            assert issubclass(
+                w[0].category, bigframes.exceptions.UnknownLocationWarning
+            )
+            assert (
+                f"The location '{invalid_location}' is set to an unknown value. "
+                in str(w[0].message)
+            )
+            # The message might contain newlines added by textwrap.fill.
+            assert possibility in str(w[0].message).replace("\n", "")
+
 
 def test_client_endpoints_override_set_shows_warning():
     options = bigquery_options.BigQueryOptions()

From 024113942aed1e0dcfa3877378fe729b29044155 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 6 Mar 2025 10:10:31 -0800
Subject: [PATCH 04/19] chore: add experimental blob url tests (#1463)

* chore: add experimental blob url tests

* fix
---
 tests/system/small/blob/test_urls.py | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 tests/system/small/blob/test_urls.py

diff --git a/tests/system/small/blob/test_urls.py b/tests/system/small/blob/test_urls.py
new file mode 100644
index 0000000000..da972348f2
--- /dev/null
+++ b/tests/system/small/blob/test_urls.py
@@ -0,0 +1,32 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import bigframes
+import bigframes.pandas as bpd
+
+
+def test_blob_read_url(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    urls = images_mm_df["blob_col"].blob.read_url()
+
+    assert urls.str.startswith("https://storage.googleapis.com/").all()
+
+
+def test_blob_write_url(images_mm_df: bpd.DataFrame):
+    bigframes.options.experiments.blob = True
+
+    urls = images_mm_df["blob_col"].blob.write_url()
+
+    assert urls.str.startswith("https://storage.googleapis.com/").all()

From fe72ada9cebb32947560c97567d7937c8b618f0d Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Thu, 6 Mar 2025 10:46:00 -0800
Subject: [PATCH 05/19] fix: Fix list-like indexers in partial ordering mode
 (#1456)

---
 bigframes/core/blocks.py             | 20 ++++++++++++++++----
 bigframes/core/indexers.py           | 12 ++++++++++--
 bigframes/dataframe.py               | 10 ++++++++--
 tests/system/conftest.py             | 10 ++++++++++
 tests/system/small/test_dataframe.py | 14 ++++++++++++++
 5 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index b4e3ea0f86..66d9d6772f 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -2325,6 +2325,7 @@ def _apply_binop(
 
         return self.project_exprs(exprs, labels=labels, drop=True)
 
+    # TODO: Re-implement join in terms of merge (requires also adding remaining merge args)
     def join(
         self,
         other: Block,
@@ -2332,6 +2333,7 @@ def join(
         how="left",
         sort: bool = False,
         block_identity_join: bool = False,
+        always_order: bool = False,
     ) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
         """
         Join two blocks objects together, and provide mappings between source columns and output columns.
@@ -2345,6 +2347,8 @@ def join(
                 if true will sort result by index
             block_identity_join (bool):
                 If true, will not convert join to a projection (implicitly assuming unique indices)
+            always_order (bool):
+                If true, will always preserve input ordering, even if ordering mode is partial
 
         Returns:
             Block, (left_mapping, right_mapping): Result block and mappers from input column ids to result column ids.
@@ -2390,10 +2394,14 @@ def join(
         self._throw_if_null_index("join")
         other._throw_if_null_index("join")
         if self.index.nlevels == other.index.nlevels == 1:
-            return join_mono_indexed(self, other, how=how, sort=sort)
+            return join_mono_indexed(
+                self, other, how=how, sort=sort, propogate_order=always_order
+            )
         else:  # Handles cases where one or both sides are multi-indexed
             # Always sort mult-index join
-            return join_multi_indexed(self, other, how=how, sort=sort)
+            return join_multi_indexed(
+                self, other, how=how, sort=sort, propogate_order=always_order
+            )
 
     def is_monotonic_increasing(
         self, column_id: typing.Union[str, Sequence[str]]
@@ -2850,7 +2858,8 @@ def join_mono_indexed(
     right: Block,
     *,
     how="left",
-    sort=False,
+    sort: bool = False,
+    propogate_order: bool = False,
 ) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
     left_expr = left.expr
     right_expr = right.expr
@@ -2861,6 +2870,7 @@ def join_mono_indexed(
         conditions=(
             join_defs.JoinCondition(left.index_columns[0], right.index_columns[0]),
         ),
+        propogate_order=propogate_order,
     )
 
     left_index = get_column_left[left.index_columns[0]]
@@ -2895,7 +2905,8 @@ def join_multi_indexed(
     right: Block,
     *,
     how="left",
-    sort=False,
+    sort: bool = False,
+    propogate_order: bool = False,
 ) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]:
     if not (left.index.is_uniquely_named() and right.index.is_uniquely_named()):
         raise ValueError("Joins not supported on indices with non-unique level names")
@@ -2924,6 +2935,7 @@ def join_multi_indexed(
             join_defs.JoinCondition(left, right)
             for left, right in zip(left_join_ids, right_join_ids)
         ),
+        propogate_order=propogate_order,
     )
 
     left_ids_post_join = [get_column_left[id] for id in left_join_ids]
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index c0c4d9ec11..d1a0c42e97 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -379,12 +379,14 @@ def _perform_loc_list_join(
         result = typing.cast(
             bigframes.series.Series,
             series_or_dataframe.to_frame()._perform_join_by_index(
-                keys_index, how="right"
+                keys_index, how="right", always_order=True
             )[name],
         )
         result = result.rename(original_name)
     else:
-        result = series_or_dataframe._perform_join_by_index(keys_index, how="right")
+        result = series_or_dataframe._perform_join_by_index(
+            keys_index, how="right", always_order=True
+        )
 
     if drop_levels and series_or_dataframe.index.nlevels > keys_index.nlevels:
         # drop common levels
@@ -492,6 +494,12 @@ def _iloc_getitem_series_or_dataframe(
 
         # set to offset index and use regular loc, then restore index
         df = df.reset_index(drop=False)
+        block = df._block
+        # explicitly set index to offsets, reset_index may not generate offsets in some modes
+        block, offsets_id = block.promote_offsets("temp_iloc_offsets_")
+        block = block.set_index([offsets_id])
+        df = bigframes.dataframe.DataFrame(block)
+
         result = df.loc[key]
         result = result.set_index(temporary_index_names)
         result = result.rename_axis(original_index_names)
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index a48e06d86c..151da51792 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -3238,9 +3238,15 @@ def join(
         return left._perform_join_by_index(right, how=how)
 
     def _perform_join_by_index(
-        self, other: Union[DataFrame, indexes.Index], *, how: str = "left"
+        self,
+        other: Union[DataFrame, indexes.Index],
+        *,
+        how: str = "left",
+        always_order: bool = False,
     ):
-        block, _ = self._block.join(other._block, how=how, block_identity_join=True)
+        block, _ = self._block.join(
+            other._block, how=how, block_identity_join=True, always_order=always_order
+        )
         return DataFrame(block)
 
     @validations.requires_ordering()
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index d40d0e0eef..5b3add053c 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -544,6 +544,16 @@ def scalars_df_index(
     return session.read_gbq(scalars_table_id, index_col="rowindex")
 
 
+@pytest.fixture(scope="session")
+def scalars_df_partial_ordering(
+    scalars_table_id: str, unordered_session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """DataFrame pointing at test data."""
+    return unordered_session.read_gbq(
+        scalars_table_id, index_col="rowindex"
+    ).sort_index()
+
+
 @pytest.fixture(scope="session")
 def scalars_df_null_index(
     scalars_table_id: str, session: bigframes.Session
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index db777137b0..f80b811217 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -4418,6 +4418,20 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_iloc_list_partial_ordering(
+    scalars_df_partial_ordering, scalars_pandas_df_index
+):
+    index_list = [0, 0, 0, 5, 4, 7]
+
+    bf_result = scalars_df_partial_ordering.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
 def test_iloc_list_multiindex(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     scalars_df = scalars_df.copy()

From 7b6e3615f8d4531beb4b59ca1223927112e713da Mon Sep 17 00:00:00 2001
From: jialuoo <jialuo@google.com>
Date: Thu, 6 Mar 2025 13:01:37 -0800
Subject: [PATCH 06/19] fix: fix the merge issue between 1424 and 1373 (#1461)

* fix: fix the merge issue between 1424 and 1373

* Update _function_session.py
---
 bigframes/functions/_function_session.py | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py
index ce0ade26ff..15c8cb979e 100644
--- a/bigframes/functions/_function_session.py
+++ b/bigframes/functions/_function_session.py
@@ -47,6 +47,7 @@
 )
 
 from bigframes import clients
+from bigframes import version as bigframes_version
 import bigframes.core.compile.ibis_types
 import bigframes.exceptions as bfe
 import bigframes.series as bf_series
@@ -265,6 +266,13 @@ def remote_function(
         .. deprecated:: 0.0.1
         This is an internal method. Please use :func:`bigframes.pandas.remote_function` instead.
 
+        .. warning::
+            To use remote functions with Bigframes 2.0 and onwards, please (preferred)
+            set an explicit user-managed ``cloud_function_service_account`` or (discouraged)
+            set ``cloud_function_service_account`` to use the Compute Engine service account
+            by setting it to `"default"`.
+            See, https://cloud.google.com/functions/docs/securing/function-identity.
+
         .. note::
             Please make sure following is setup before using this API:
 
@@ -445,6 +453,26 @@ def remote_function(
         # Some defaults may be used from the session if not provided otherwise.
         session = self._resolve_session(session)
 
+        # raise a UserWarning if user does not explicitly set cloud_function_service_account to a
+        # user-managed cloud_function_service_account of to default
+        msg = bfe.format_message(
+            "You have not explicitly set a user-managed `cloud_function_service_account`. "
+            "Using the default Compute Engine service account. "
+            "To use Bigframes 2.0, please explicitly set `cloud_function_service_account` "
+            'either to a user-managed service account (preferred) or to `"default"` '
+            "to use the Compute Engine service account (discouraged). "
+            "See, https://cloud.google.com/functions/docs/securing/function-identity."
+        )
+
+        if (
+            bigframes_version.__version__.startswith("1.")
+            and cloud_function_service_account is None
+        ):
+            warnings.warn(msg, stacklevel=2, category=FutureWarning)
+
+        if cloud_function_service_account == "default":
+            cloud_function_service_account = None
+
         # A BigQuery client is required to perform BQ operations.
         bigquery_client = self._resolve_bigquery_client(session, bigquery_client)
 

From f3fadd780d7a786b6924e887bcb4b1e8f973c11b Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 6 Mar 2025 15:11:01 -0800
Subject: [PATCH 07/19] chore: add experimental blob functions retry and
 timeout (#1469)

* chore: add experimental blob functions retry and timeout

* fix
---
 bigframes/blob/_functions.py | 57 +++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 11 deletions(-)

diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
index 830bc8de06..a3e7ae153c 100644
--- a/bigframes/blob/_functions.py
+++ b/bigframes/blob/_functions.py
@@ -112,6 +112,10 @@ def image_blur_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
@@ -121,7 +125,7 @@ def image_blur_func(
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
     dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -135,12 +139,13 @@ def image_blur_func(
     ext = ext_mappings.get(ext, ext)
     content_type = "image/" + ext
 
-    requests.put(
+    session.put(
         url=dst_url,
         data=bts,
         headers={
             "Content-Type": content_type,
         },
+        timeout=30,
     )
 
     return dst_obj_ref_rt
@@ -157,13 +162,17 @@ def image_blur_to_bytes_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
     src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -193,6 +202,10 @@ def image_resize_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
@@ -202,7 +215,7 @@ def image_resize_func(
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
     dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -216,12 +229,13 @@ def image_resize_func(
     ext = ext_mappings.get(ext, ext)
     content_type = "image/" + ext
 
-    requests.put(
+    session.put(
         url=dst_url,
         data=bts,
         headers={
             "Content-Type": content_type,
         },
+        timeout=30,
     )
 
     return dst_obj_ref_rt
@@ -245,13 +259,17 @@ def image_resize_to_bytes_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
     src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -280,6 +298,10 @@ def image_normalize_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
@@ -296,7 +318,7 @@ def image_normalize_func(
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
     dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -312,12 +334,13 @@ def image_normalize_func(
     ext = ext_mappings.get(ext, ext)
     content_type = "image/" + ext
 
-    requests.put(
+    session.put(
         url=dst_url,
         data=bts,
         headers={
             "Content-Type": content_type,
         },
+        timeout=30,
     )
 
     return dst_obj_ref_rt
@@ -336,6 +359,10 @@ def image_normalize_to_bytes_func(
     import cv2 as cv  # type: ignore
     import numpy as np
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     ext = ext or ".jpeg"
 
@@ -349,7 +376,7 @@ def image_normalize_to_bytes_func(
     src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = requests.get(src_url)
+    response = session.get(src_url, timeout=30)
     bts = response.content
 
     nparr = np.frombuffer(bts, np.uint8)
@@ -374,11 +401,15 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
 
     from pypdf import PdfReader  # type: ignore
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = requests.get(src_url, stream=True)
+    response = session.get(src_url, timeout=30, stream=True)
     response.raise_for_status()
     pdf_bytes = response.content
 
@@ -403,11 +434,15 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
 
     from pypdf import PdfReader  # type: ignore
     import requests
+    from requests import adapters
+
+    session = requests.Session()
+    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
     src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
     src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = requests.get(src_url, stream=True)
+    response = session.get(src_url, timeout=30, stream=True)
     response.raise_for_status()
     pdf_bytes = response.content
 

From be5098202ff773638c2bf0b2afb4d73f52dc7f31 Mon Sep 17 00:00:00 2001
From: jialuoo <jialuo@google.com>
Date: Thu, 6 Mar 2025 18:42:12 -0800
Subject: [PATCH 08/19] test: add unit tests for udf experiment options (#1468)

---
 tests/unit/_config/test_experiment_options.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/unit/_config/test_experiment_options.py b/tests/unit/_config/test_experiment_options.py
index 8e612be06c..9735e494be 100644
--- a/tests/unit/_config/test_experiment_options.py
+++ b/tests/unit/_config/test_experiment_options.py
@@ -46,3 +46,18 @@ def test_blob_set_true_shows_warning():
         options.blob = True
 
     assert options.blob is True
+
+
+def test_udf_default_false():
+    options = experiment_options.ExperimentOptions()
+
+    assert options.udf is False
+
+
+def test_udf_set_true_shows_warning():
+    options = experiment_options.ExperimentOptions()
+
+    with pytest.warns(bfe.PreviewWarning):
+        options.udf = True
+
+    assert options.udf is True

From 9a65e836394a52632dedd9489310c678537d0e37 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 6 Mar 2025 19:39:31 -0800
Subject: [PATCH 09/19] chore: fix Multimodal Gemini modifies input DF (#1467)

* chore: add experimental blob url tests

* fix

* chore: fix Multimodal Gemini modifies input DF

* fix
---
 bigframes/core/convert.py               | 4 ++--
 tests/system/small/core/test_convert.py | 5 ++++-
 tests/unit/ml/test_golden_sql.py        | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py
index 94a0564556..1546c2f87e 100644
--- a/bigframes/core/convert.py
+++ b/bigframes/core/convert.py
@@ -54,7 +54,7 @@ def to_bf_series(
         bigframes.pandas.Series
     """
     if isinstance(obj, series.Series):
-        return obj
+        return obj.copy()
 
     if session is None:
         session = global_session.get_global_session()
@@ -118,7 +118,7 @@ def to_bf_dataframe(
     session: Optional[session.Session] = None,
 ) -> dataframe.DataFrame:
     if isinstance(obj, dataframe.DataFrame):
-        return obj
+        return obj.copy()
 
     if isinstance(obj, pd.DataFrame):
         if session is None:
diff --git a/tests/system/small/core/test_convert.py b/tests/system/small/core/test_convert.py
index 3f74d17091..7ce0dd47ba 100644
--- a/tests/system/small/core/test_convert.py
+++ b/tests/system/small/core/test_convert.py
@@ -56,4 +56,7 @@ def test_to_bf_dataframe(input, session):
 def test_to_bf_dataframe_with_bf_dataframe(session):
     bf = dataframe.DataFrame({"test": [1, 2, 3]}, session=session)
 
-    assert convert.to_bf_dataframe(bf, None, session) is bf
+    testing.assert_frame_equal(
+        convert.to_bf_dataframe(bf, None, session).to_pandas(),
+        bf.to_pandas(),
+    )
diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py
index 97d1d2d7d1..c9d147e18f 100644
--- a/tests/unit/ml/test_golden_sql.py
+++ b/tests/unit/ml/test_golden_sql.py
@@ -66,6 +66,7 @@ def mock_y(mock_session):
     mock_y._session = mock_session
     mock_y.columns = pd.Index(["input_column_label"])
     mock_y.cache.return_value = mock_y
+    mock_y.copy.return_value = mock_y
 
     return mock_y
 
@@ -98,6 +99,7 @@ def mock_X(mock_y, mock_session):
     )
 
     mock_X.cache.return_value = mock_X
+    mock_X.copy.return_value = mock_X
 
     return mock_X
 

From 27ab028cdc45296923b12446c77b344af4208a3a Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 7 Mar 2025 13:58:18 -0800
Subject: [PATCH 10/19] perf: Compilation no longer bounded by recursion
 (#1464)

---
 bigframes/core/bigframe_node.py    |  13 +
 bigframes/core/compile/api.py      |  13 +-
 bigframes/core/compile/compiler.py | 468 ++++++++++++++---------------
 bigframes/session/executor.py      |   2 +-
 4 files changed, 252 insertions(+), 244 deletions(-)

diff --git a/bigframes/core/bigframe_node.py b/bigframes/core/bigframe_node.py
index 32c7f92912..369e8f6329 100644
--- a/bigframes/core/bigframe_node.py
+++ b/bigframes/core/bigframe_node.py
@@ -32,6 +32,8 @@
 
 COLUMN_SET = frozenset[identifiers.ColumnId]
 
+T = typing.TypeVar("T")
+
 
 @dataclasses.dataclass(frozen=True)
 class Field:
@@ -382,3 +384,14 @@ def bottom_up(
             results[node] = result
 
         return results[self]
+
+    def reduce_up(self, reduction: Callable[[BigFrameNode, Tuple[T, ...]], T]) -> T:
+        """Apply a bottom-up reduction to the tree."""
+        results: dict[BigFrameNode, T] = {}
+        for node in list(self.iter_nodes_topo()):
+            # child nodes have already been transformed
+            child_results = tuple(results[child] for child in node.child_nodes)
+            result = reduction(node, child_results)
+            results[node] = result
+
+        return results[self]
diff --git a/bigframes/core/compile/api.py b/bigframes/core/compile/api.py
index cf441a2053..32257c0f98 100644
--- a/bigframes/core/compile/api.py
+++ b/bigframes/core/compile/api.py
@@ -25,13 +25,8 @@
     import bigframes.core.ordering
     import bigframes.core.schema
 
-_STRICT_COMPILER = compiler.Compiler(strict=True)
-
 
 class SQLCompiler:
-    def __init__(self, strict: bool = True):
-        self._compiler = compiler.Compiler(strict=strict)
-
     def compile(
         self,
         node: bigframes.core.nodes.BigFrameNode,
@@ -41,7 +36,7 @@ def compile(
     ) -> str:
         """Compile node into sql where rows are sorted with ORDER BY."""
         # If we are ordering the query anyways, compiling the slice as a limit is probably a good idea.
-        return self._compiler.compile_sql(node, ordered=ordered, limit=limit)
+        return compiler.compile_sql(node, ordered=ordered, limit=limit)
 
     def compile_raw(
         self,
@@ -50,16 +45,16 @@ def compile_raw(
         str, Sequence[bigquery.SchemaField], bigframes.core.ordering.RowOrdering
     ]:
         """Compile node into sql that exposes all columns, including hidden ordering-only columns."""
-        return self._compiler.compile_raw(node)
+        return compiler.compile_raw(node)
 
 
 def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
     """Use only for testing paths to ensure ibis inferred schema does not diverge from bigframes inferred schema."""
     import bigframes.core.schema
 
-    node = _STRICT_COMPILER._replace_unsupported_ops(node)
+    node = compiler._replace_unsupported_ops(node)
     node, _ = rewrite.pull_up_order(node, order_root=False)
-    ir = _STRICT_COMPILER.compile_node(node)
+    ir = compiler.compile_node(node)
     items = tuple(
         bigframes.core.schema.SchemaItem(name, ir.get_column_type(ibis_id))
         for name, ibis_id in zip(node.schema.names, ir.column_ids)
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index f5be71830c..3d9bf19f76 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from __future__ import annotations
 
-import dataclasses
 import functools
 import io
 import typing
@@ -42,249 +41,250 @@
     import bigframes.session
 
 
-@dataclasses.dataclass(frozen=True)
-class Compiler:
-    # In strict mode, ordering will always be deterministic
-    # In unstrict mode, ordering from ReadTable or after joins may be ambiguous to improve query performance.
-    strict: bool = True
-    scalar_op_compiler = compile_scalar.ScalarOpCompiler()
-
-    def compile_sql(
-        self,
-        node: nodes.BigFrameNode,
-        ordered: bool,
-        limit: typing.Optional[int] = None,
-    ) -> str:
-        # later steps might add ids, so snapshot before those steps.
-        output_ids = node.schema.names
-        if ordered:
-            # Need to do this before replacing unsupported ops, as that will rewrite slice ops
-            node, pulled_up_limit = rewrites.pullup_limit_from_slice(node)
-            if (pulled_up_limit is not None) and (
-                (limit is None) or limit > pulled_up_limit
-            ):
-                limit = pulled_up_limit
-
-        node = self._replace_unsupported_ops(node)
-        # prune before pulling up order to avoid unnnecessary row_number() ops
-        node = rewrites.column_pruning(node)
-        node, ordering = rewrites.pull_up_order(node, order_root=ordered)
-        # final pruning to cleanup up any leftovers unused values
-        node = rewrites.column_pruning(node)
-        return self.compile_node(node).to_sql(
-            order_by=ordering.all_ordering_columns if ordered else (),
-            limit=limit,
-            selections=output_ids,
+def compile_sql(
+    node: nodes.BigFrameNode,
+    ordered: bool,
+    limit: typing.Optional[int] = None,
+) -> str:
+    # later steps might add ids, so snapshot before those steps.
+    output_ids = node.schema.names
+    if ordered:
+        # Need to do this before replacing unsupported ops, as that will rewrite slice ops
+        node, pulled_up_limit = rewrites.pullup_limit_from_slice(node)
+        if (pulled_up_limit is not None) and (
+            (limit is None) or limit > pulled_up_limit
+        ):
+            limit = pulled_up_limit
+
+    node = _replace_unsupported_ops(node)
+    # prune before pulling up order to avoid unnnecessary row_number() ops
+    node = rewrites.column_pruning(node)
+    node, ordering = rewrites.pull_up_order(node, order_root=ordered)
+    # final pruning to cleanup up any leftovers unused values
+    node = rewrites.column_pruning(node)
+    return compile_node(node).to_sql(
+        order_by=ordering.all_ordering_columns if ordered else (),
+        limit=limit,
+        selections=output_ids,
+    )
+
+
+def compile_raw(
+    node: nodes.BigFrameNode,
+) -> typing.Tuple[
+    str, typing.Sequence[google.cloud.bigquery.SchemaField], bf_ordering.RowOrdering
+]:
+    node = _replace_unsupported_ops(node)
+    node = rewrites.column_pruning(node)
+    node, ordering = rewrites.pull_up_order(node, order_root=True)
+    node = rewrites.column_pruning(node)
+    sql = compile_node(node).to_sql()
+    return sql, node.schema.to_bigquery(), ordering
+
+
+def _replace_unsupported_ops(node: nodes.BigFrameNode):
+    # TODO: Run all replacement rules as single bottom-up pass
+    node = nodes.bottom_up(node, rewrites.rewrite_slice)
+    node = nodes.bottom_up(node, rewrites.rewrite_timedelta_expressions)
+    return node
+
+
+# TODO: Remove cache when schema no longer requires compilation to derive schema (and therefor only compiles for execution)
+@functools.lru_cache(maxsize=5000)
+def compile_node(node: nodes.BigFrameNode) -> compiled.UnorderedIR:
+    """Compile node into CompileArrayValue. Caches result."""
+    return node.reduce_up(lambda node, children: _compile_node(node, *children))
+
+
+@functools.singledispatch
+def _compile_node(
+    node: nodes.BigFrameNode, *compiled_children: compiled.UnorderedIR
+) -> compiled.UnorderedIR:
+    """Defines transformation but isn't cached, always use compile_node instead"""
+    raise ValueError(f"Can't compile unrecognized node: {node}")
+
+
+@_compile_node.register
+def compile_join(
+    node: nodes.JoinNode, left: compiled.UnorderedIR, right: compiled.UnorderedIR
+):
+    condition_pairs = tuple(
+        (left.id.sql, right.id.sql) for left, right in node.conditions
+    )
+    return left.join(
+        right=right,
+        type=node.type,
+        conditions=condition_pairs,
+        join_nulls=node.joins_nulls,
+    )
+
+
+@_compile_node.register
+def compile_isin(
+    node: nodes.InNode, left: compiled.UnorderedIR, right: compiled.UnorderedIR
+):
+    return left.isin_join(
+        right=right,
+        indicator_col=node.indicator_col.sql,
+        conditions=(node.left_col.id.sql, node.right_col.id.sql),
+        join_nulls=node.joins_nulls,
+    )
+
+
+@_compile_node.register
+def compile_fromrange(
+    node: nodes.FromRangeNode, start: compiled.UnorderedIR, end: compiled.UnorderedIR
+):
+    # Both start and end are single elements and do not inherently have an order)
+    start_table = start._to_ibis_expr()
+    end_table = end._to_ibis_expr()
+
+    start_column = start_table.schema().names[0]
+    end_column = end_table.schema().names[0]
+
+    # Perform a cross join to avoid errors
+    joined_table = start_table.cross_join(end_table)
+
+    labels_array_table = ibis_api.range(
+        joined_table[start_column], joined_table[end_column] + node.step, node.step
+    ).name(node.output_id.sql)
+    labels = (
+        typing.cast(ibis_types.ArrayValue, labels_array_table)
+        .as_table()
+        .unnest([node.output_id.sql])
+    )
+    return compiled.UnorderedIR(
+        labels,
+        columns=[labels[labels.columns[0]]],
+    )
+
+
+@_compile_node.register
+def compile_readlocal(node: nodes.ReadLocalNode, *args):
+    array_as_pd = pd.read_feather(
+        io.BytesIO(node.feather_bytes),
+        columns=[item.source_id for item in node.scan_list.items],
+    )
+
+    # Convert timedeltas to microseconds for compatibility with BigQuery
+    _ = utils.replace_timedeltas_with_micros(array_as_pd)
+
+    offsets = node.offsets_col.sql if node.offsets_col else None
+    return compiled.UnorderedIR.from_pandas(
+        array_as_pd, node.scan_list, offsets=offsets
+    )
+
+
+@_compile_node.register
+def compile_readtable(node: nodes.ReadTableNode, *args):
+    ibis_table = _table_to_ibis(
+        node.source, scan_cols=[col.source_id for col in node.scan_list.items]
+    )
+
+    # TODO(b/395912450): Remove workaround solution once b/374784249 got resolved.
+    for scan_item in node.scan_list.items:
+        if (
+            scan_item.dtype == dtypes.JSON_DTYPE
+            and ibis_table[scan_item.source_id].type() == ibis_dtypes.string
+        ):
+            json_column = compile_scalar.parse_json(
+                ibis_table[scan_item.source_id]
+            ).name(scan_item.source_id)
+            ibis_table = ibis_table.mutate(json_column)
+
+    return compiled.UnorderedIR(
+        ibis_table,
+        tuple(
+            ibis_table[scan_item.source_id].name(scan_item.id.sql)
+            for scan_item in node.scan_list.items
+        ),
+    )
+
+
+def _table_to_ibis(
+    source: nodes.BigqueryDataSource,
+    scan_cols: typing.Sequence[str],
+) -> ibis_types.Table:
+    full_table_name = (
+        f"{source.table.project_id}.{source.table.dataset_id}.{source.table.table_id}"
+    )
+    # Physical schema might include unused columns, unsupported datatypes like JSON
+    physical_schema = ibis_bigquery.BigQuerySchema.to_ibis(
+        list(source.table.physical_schema)
+    )
+    if source.at_time is not None or source.sql_predicate is not None:
+        import bigframes.session._io.bigquery
+
+        sql = bigframes.session._io.bigquery.to_query(
+            full_table_name,
+            columns=scan_cols,
+            sql_predicate=source.sql_predicate,
+            time_travel_timestamp=source.at_time,
         )
+        return ibis_bigquery.Backend().sql(schema=physical_schema, query=sql)
+    else:
+        return ibis_api.table(physical_schema, full_table_name).select(scan_cols)
 
-    def compile_raw(
-        self,
-        node: nodes.BigFrameNode,
-    ) -> typing.Tuple[
-        str, typing.Sequence[google.cloud.bigquery.SchemaField], bf_ordering.RowOrdering
-    ]:
-        node = self._replace_unsupported_ops(node)
-        node = rewrites.column_pruning(node)
-        node, ordering = rewrites.pull_up_order(node, order_root=True)
-        node = rewrites.column_pruning(node)
-        sql = self.compile_node(node).to_sql()
-        return sql, node.schema.to_bigquery(), ordering
-
-    def _replace_unsupported_ops(self, node: nodes.BigFrameNode):
-        # TODO: Run all replacement rules as single bottom-up pass
-        node = nodes.bottom_up(node, rewrites.rewrite_slice)
-        node = nodes.bottom_up(node, rewrites.rewrite_timedelta_expressions)
-        return node
-
-    # TODO: Remove cache when schema no longer requires compilation to derive schema (and therefor only compiles for execution)
-    @functools.lru_cache(maxsize=5000)
-    def compile_node(self, node: nodes.BigFrameNode) -> compiled.UnorderedIR:
-        """Compile node into CompileArrayValue. Caches result."""
-        return self._compile_node(node)
-
-    @functools.singledispatchmethod
-    def _compile_node(self, node: nodes.BigFrameNode) -> compiled.UnorderedIR:
-        """Defines transformation but isn't cached, always use compile_node instead"""
-        raise ValueError(f"Can't compile unrecognized node: {node}")
-
-    @_compile_node.register
-    def compile_join(self, node: nodes.JoinNode):
-        condition_pairs = tuple(
-            (left.id.sql, right.id.sql) for left, right in node.conditions
-        )
 
-        left_unordered = self.compile_node(node.left_child)
-        right_unordered = self.compile_node(node.right_child)
-        return left_unordered.join(
-            right=right_unordered,
-            type=node.type,
-            conditions=condition_pairs,
-            join_nulls=node.joins_nulls,
-        )
+@_compile_node.register
+def compile_filter(node: nodes.FilterNode, child: compiled.UnorderedIR):
+    return child.filter(node.predicate)
 
-    @_compile_node.register
-    def compile_isin(self, node: nodes.InNode):
-        left_unordered = self.compile_node(node.left_child)
-        right_unordered = self.compile_node(node.right_child)
-        return left_unordered.isin_join(
-            right=right_unordered,
-            indicator_col=node.indicator_col.sql,
-            conditions=(node.left_col.id.sql, node.right_col.id.sql),
-            join_nulls=node.joins_nulls,
-        )
 
-    @_compile_node.register
-    def compile_fromrange(self, node: nodes.FromRangeNode):
-        # Both start and end are single elements and do not inherently have an order
-        start = self.compile_node(node.start)
-        end = self.compile_node(node.end)
-        start_table = start._to_ibis_expr()
-        end_table = end._to_ibis_expr()
-
-        start_column = start_table.schema().names[0]
-        end_column = end_table.schema().names[0]
-
-        # Perform a cross join to avoid errors
-        joined_table = start_table.cross_join(end_table)
-
-        labels_array_table = ibis_api.range(
-            joined_table[start_column], joined_table[end_column] + node.step, node.step
-        ).name(node.output_id.sql)
-        labels = (
-            typing.cast(ibis_types.ArrayValue, labels_array_table)
-            .as_table()
-            .unnest([node.output_id.sql])
-        )
-        return compiled.UnorderedIR(
-            labels,
-            columns=[labels[labels.columns[0]]],
-        )
+@_compile_node.register
+def compile_selection(node: nodes.SelectionNode, child: compiled.UnorderedIR):
+    selection = tuple((ref, id.sql) for ref, id in node.input_output_pairs)
+    return child.selection(selection)
 
-    @_compile_node.register
-    def compile_readlocal(self, node: nodes.ReadLocalNode):
-        array_as_pd = pd.read_feather(
-            io.BytesIO(node.feather_bytes),
-            columns=[item.source_id for item in node.scan_list.items],
-        )
 
-        # Convert timedeltas to microseconds for compatibility with BigQuery
-        _ = utils.replace_timedeltas_with_micros(array_as_pd)
+@_compile_node.register
+def compile_projection(node: nodes.ProjectionNode, child: compiled.UnorderedIR):
+    projections = ((expr, id.sql) for expr, id in node.assignments)
+    return child.projection(tuple(projections))
 
-        offsets = node.offsets_col.sql if node.offsets_col else None
-        return compiled.UnorderedIR.from_pandas(
-            array_as_pd, node.scan_list, offsets=offsets
-        )
 
-    @_compile_node.register
-    def compile_readtable(self, node: nodes.ReadTableNode):
-        return self.compile_read_table_unordered(node.source, node.scan_list)
-
-    def read_table_as_unordered_ibis(
-        self,
-        source: nodes.BigqueryDataSource,
-        scan_cols: typing.Sequence[str],
-    ) -> ibis_types.Table:
-        full_table_name = f"{source.table.project_id}.{source.table.dataset_id}.{source.table.table_id}"
-        # Physical schema might include unused columns, unsupported datatypes like JSON
-        physical_schema = ibis_bigquery.BigQuerySchema.to_ibis(
-            list(source.table.physical_schema)
-        )
-        if source.at_time is not None or source.sql_predicate is not None:
-            import bigframes.session._io.bigquery
-
-            sql = bigframes.session._io.bigquery.to_query(
-                full_table_name,
-                columns=scan_cols,
-                sql_predicate=source.sql_predicate,
-                time_travel_timestamp=source.at_time,
-            )
-            return ibis_bigquery.Backend().sql(schema=physical_schema, query=sql)
-        else:
-            return ibis_api.table(physical_schema, full_table_name).select(scan_cols)
-
-    def compile_read_table_unordered(
-        self, source: nodes.BigqueryDataSource, scan: nodes.ScanList
-    ):
-        ibis_table = self.read_table_as_unordered_ibis(
-            source, scan_cols=[col.source_id for col in scan.items]
-        )
+@_compile_node.register
+def compile_concat(node: nodes.ConcatNode, *children: compiled.UnorderedIR):
+    output_ids = [id.sql for id in node.output_ids]
+    return concat_impl.concat_unordered(children, output_ids)
 
-        # TODO(b/395912450): Remove workaround solution once b/374784249 got resolved.
-        for scan_item in scan.items:
-            if (
-                scan_item.dtype == dtypes.JSON_DTYPE
-                and ibis_table[scan_item.source_id].type() == ibis_dtypes.string
-            ):
-                json_column = compile_scalar.parse_json(
-                    ibis_table[scan_item.source_id]
-                ).name(scan_item.source_id)
-                ibis_table = ibis_table.mutate(json_column)
-
-        return compiled.UnorderedIR(
-            ibis_table,
-            tuple(
-                ibis_table[scan_item.source_id].name(scan_item.id.sql)
-                for scan_item in scan.items
-            ),
-        )
 
-    @_compile_node.register
-    def compile_filter(self, node: nodes.FilterNode):
-        return self.compile_node(node.child).filter(node.predicate)
-
-    @_compile_node.register
-    def compile_selection(self, node: nodes.SelectionNode):
-        result = self.compile_node(node.child)
-        selection = tuple((ref, id.sql) for ref, id in node.input_output_pairs)
-        return result.selection(selection)
-
-    @_compile_node.register
-    def compile_projection(self, node: nodes.ProjectionNode):
-        result = self.compile_node(node.child)
-        projections = ((expr, id.sql) for expr, id in node.assignments)
-        return result.projection(tuple(projections))
-
-    @_compile_node.register
-    def compile_concat(self, node: nodes.ConcatNode):
-        output_ids = [id.sql for id in node.output_ids]
-        compiled_unordered = [self.compile_node(node) for node in node.children]
-        return concat_impl.concat_unordered(compiled_unordered, output_ids)
-
-    @_compile_node.register
-    def compile_rowcount(self, node: nodes.RowCountNode):
-        result = self.compile_node(node.child).row_count(name=node.col_id.sql)
-        return result
-
-    @_compile_node.register
-    def compile_aggregate(self, node: nodes.AggregateNode):
-        aggs = tuple((agg, id.sql) for agg, id in node.aggregations)
-        result = self.compile_node(node.child).aggregate(
-            aggs, node.by_column_ids, order_by=node.order_by
-        )
-        # TODO: Remove dropna field and use filter node instead
-        if node.dropna:
-            for key in node.by_column_ids:
-                if node.child.field_by_id[key.id].nullable:
-                    result = result.filter(operations.notnull_op.as_expr(key))
-        return result
-
-    @_compile_node.register
-    def compile_window(self, node: nodes.WindowOpNode):
-        result = self.compile_node(node.child).project_window_op(
-            node.expression,
-            node.window_spec,
-            node.output_name.sql,
-            never_skip_nulls=node.never_skip_nulls,
-        )
-        return result
+@_compile_node.register
+def compile_rowcount(node: nodes.RowCountNode, child: compiled.UnorderedIR):
+    return child.row_count(name=node.col_id.sql)
+
+
+@_compile_node.register
+def compile_aggregate(node: nodes.AggregateNode, child: compiled.UnorderedIR):
+    aggs = tuple((agg, id.sql) for agg, id in node.aggregations)
+    result = child.aggregate(aggs, node.by_column_ids, order_by=node.order_by)
+    # TODO: Remove dropna field and use filter node instead
+    if node.dropna:
+        for key in node.by_column_ids:
+            if node.child.field_by_id[key.id].nullable:
+                result = result.filter(operations.notnull_op.as_expr(key))
+    return result
+
+
+@_compile_node.register
+def compile_window(node: nodes.WindowOpNode, child: compiled.UnorderedIR):
+    result = child.project_window_op(
+        node.expression,
+        node.window_spec,
+        node.output_name.sql,
+        never_skip_nulls=node.never_skip_nulls,
+    )
+    return result
+
+
+@_compile_node.register
+def compile_explode(node: nodes.ExplodeNode, child: compiled.UnorderedIR):
+    offsets_col = node.offsets_col.sql if (node.offsets_col is not None) else None
+    return bigframes.core.compile.explode.explode_unordered(
+        child, node.column_ids, offsets_col
+    )
 
-    @_compile_node.register
-    def compile_explode(self, node: nodes.ExplodeNode):
-        offsets_col = node.offsets_col.sql if (node.offsets_col is not None) else None
-        return bigframes.core.compile.explode.explode_unordered(
-            self.compile_node(node.child), node.column_ids, offsets_col
-        )
 
-    @_compile_node.register
-    def compile_random_sample(self, node: nodes.RandomSampleNode):
-        return self.compile_node(node.child)._uniform_sampling(node.fraction)
+@_compile_node.register
+def compile_random_sample(node: nodes.RandomSampleNode, child: compiled.UnorderedIR):
+    return child._uniform_sampling(node.fraction)
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 0644b0e6d9..e539525d80 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -204,7 +204,7 @@ def __init__(
         self.bqclient = bqclient
         self.storage_manager = storage_manager
         self.compiler: bigframes.core.compile.SQLCompiler = (
-            bigframes.core.compile.SQLCompiler(strict=strictly_ordered)
+            bigframes.core.compile.SQLCompiler()
         )
         self.strictly_ordered: bool = strictly_ordered
         self._cached_executions: weakref.WeakKeyDictionary[

From ff46f5a16891638484a5bca64442bb02bf8e11f8 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Fri, 7 Mar 2025 15:01:20 -0800
Subject: [PATCH 11/19] chore: fix experimental blob docs (#1472)

---
 bigframes/operations/blob.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 6541a14655..183003780b 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -44,7 +44,7 @@ def uri(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: URIs as string."""
+            bigframes.series.Series: URIs as string."""
         s = bigframes.series.Series(self._block)
 
         return s.struct.field("uri")
@@ -56,7 +56,7 @@ def authorizer(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: Autorithers(connection) as string."""
+            bigframes.series.Series: Autorithers(connection) as string."""
         s = bigframes.series.Series(self._block)
 
         return s.struct.field("authorizer")
@@ -68,7 +68,7 @@ def version(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: Version as string."""
+            bigframes.series.Series: Version as string."""
         # version must be retrieved after fetching metadata
         return self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field("version")
 
@@ -79,7 +79,7 @@ def metadata(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time)."""
+            bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time)."""
         details_json = self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field(
             "details"
         )
@@ -94,7 +94,7 @@ def content_type(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: string of the content type."""
+            bigframes.series.Series: string of the content type."""
         return (
             self.metadata()
             ._apply_unary_op(ops.JSONValue(json_path="$.content_type"))
@@ -108,7 +108,7 @@ def md5_hash(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: string of the md5 hash."""
+            bigframes.series.Series: string of the md5 hash."""
         return (
             self.metadata()
             ._apply_unary_op(ops.JSONValue(json_path="$.md5_hash"))
@@ -122,7 +122,7 @@ def size(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: file size in bytes."""
+            bigframes.series.Series: file size in bytes."""
         return (
             self.metadata()
             ._apply_unary_op(ops.JSONValue(json_path="$.size"))
@@ -137,7 +137,7 @@ def updated(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: updated time as UTC datetime."""
+            bigframes.series.Series: updated time as UTC datetime."""
         import bigframes.pandas as bpd
 
         updated = (
@@ -159,7 +159,7 @@ def _get_runtime(
             metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime.
 
         Returns:
-            bigframes Series: ObjectRefRuntime JSON.
+            bigframes.series.Series: ObjectRefRuntime JSON.
         """
         s = self._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata else self
 
@@ -172,7 +172,7 @@ def read_url(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: Read only URLs."""
+            bigframes.series.Series: Read only URLs."""
         return self._get_runtime(mode="R")._apply_unary_op(
             ops.JSONValue(json_path="$.access_urls.read_url")
         )
@@ -184,7 +184,7 @@ def write_url(self) -> bigframes.series.Series:
             BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Returns:
-            BigFrames Series: Writable URLs."""
+            bigframes.series.Series: Writable URLs."""
         return self._get_runtime(mode="RW")._apply_unary_op(
             ops.JSONValue(json_path="$.access_urls.write_url")
         )
@@ -303,7 +303,7 @@ def image_blur(
             container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
-            BigFrames Blob Series
+            bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
         import bigframes.blob._functions as blob_func
 
@@ -390,7 +390,7 @@ def image_resize(
             container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
-            BigFrames Blob Series
+            bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
         dsize_set = dsize[0] > 0 and dsize[1] > 0
         fsize_set = fx > 0.0 and fy > 0.0
@@ -486,7 +486,7 @@ def image_normalize(
             container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
-            BigFrames Blob Series
+            bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
         import bigframes.blob._functions as blob_func
 
@@ -603,8 +603,7 @@ def pdf_chunk(
            arrays of strings.
 
         .. note::
-            BigFrames Blob is still under experiments. It may not work and
-            subject to change in the future.
+            BigFrames Blob is still under experiments. It may not work and subject to change in the future.
 
         Args:
             connection (str or None, default None): BQ connection used for
@@ -621,7 +620,7 @@ def pdf_chunk(
             container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
 
         Returns:
-            bigframe.series.Series of array[str], where each string is a
+            bigframe.series.Series: Series of array[str], where each string is a
                 chunk of text extracted from PDF.
         """
 

From 461e9e017d513376fc623a5ee47f8b9dd002b452 Mon Sep 17 00:00:00 2001
From: jialuoo <jialuo@google.com>
Date: Fri, 7 Mar 2025 17:41:10 -0800
Subject: [PATCH 12/19] feat: support list output for managed function (#1457)

* feat: support list output for managed function

* add test decorator

* resolve comments
---
 bigframes/dataframe.py                        |   6 +-
 bigframes/functions/_function_session.py      |   5 +
 bigframes/operations/remote_function_ops.py   |  21 ++-
 .../large/functions/test_managed_function.py  | 160 ++++++++++++++++++
 .../large/functions/test_remote_function.py   |  18 +-
 .../small/functions/test_managed_function.py  | 155 +++++++++++++++++
 6 files changed, 345 insertions(+), 20 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 151da51792..2349e469ab 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -4199,11 +4199,13 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
                 udf_input_dtypes = getattr(func, "input_dtypes")
                 if len(udf_input_dtypes) != len(self.columns):
                     raise ValueError(
-                        f"Remote function takes {len(udf_input_dtypes)} arguments but DataFrame has {len(self.columns)} columns."
+                        f"BigFrames BigQuery function takes {len(udf_input_dtypes)}"
+                        f" arguments but DataFrame has {len(self.columns)} columns."
                     )
                 if udf_input_dtypes != tuple(self.dtypes.to_list()):
                     raise ValueError(
-                        f"Remote function takes arguments of types {udf_input_dtypes} but DataFrame dtypes are {tuple(self.dtypes)}."
+                        f"BigFrames BigQuery function takes arguments of types "
+                        f"{udf_input_dtypes} but DataFrame dtypes are {tuple(self.dtypes)}."
                     )
 
                 series_list = [self[col] for col in self.columns]
diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py
index 15c8cb979e..0ae674b97d 100644
--- a/bigframes/functions/_function_session.py
+++ b/bigframes/functions/_function_session.py
@@ -892,6 +892,7 @@ def wrapper(func):
             func = cloudpickle.loads(cloudpickle.dumps(func))
 
             self._try_delattr(func, "bigframes_bigquery_function")
+            self._try_delattr(func, "bigframes_bigquery_function_output_dtype")
             self._try_delattr(func, "input_dtypes")
             self._try_delattr(func, "output_dtype")
             self._try_delattr(func, "is_row_processor")
@@ -951,6 +952,10 @@ def wrapper(func):
                     ibis_signature.output_type
                 )
             )
+            # Managed function directly supports certain output types which are
+            # not supported in remote function (e.g. list output). Thus no more
+            # processing for 'bigframes_bigquery_function_output_dtype'.
+            func.bigframes_bigquery_function_output_dtype = func.output_dtype
             func.is_row_processor = is_row_processor
             func.ibis_node = node
 
diff --git a/bigframes/operations/remote_function_ops.py b/bigframes/operations/remote_function_ops.py
index 8505fd1607..51cfccbc41 100644
--- a/bigframes/operations/remote_function_ops.py
+++ b/bigframes/operations/remote_function_ops.py
@@ -29,11 +29,12 @@ def expensive(self) -> bool:
         return True
 
     def output_type(self, *input_types):
-        # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
+        # The output dtype should be set to a valid Dtype by @udf decorator,
+        # @remote_function decorator, or read_gbq_function method.
         if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
             return self.func.bigframes_bigquery_function_output_dtype
-        else:
-            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
+
+        raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
 
 
 @dataclasses.dataclass(frozen=True)
@@ -46,11 +47,12 @@ def expensive(self) -> bool:
         return True
 
     def output_type(self, *input_types):
-        # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
+        # The output dtype should be set to a valid Dtype by @udf decorator,
+        # @remote_function decorator, or read_gbq_function method.
         if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
             return self.func.bigframes_bigquery_function_output_dtype
-        else:
-            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
+
+        raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
 
 
 @dataclasses.dataclass(frozen=True)
@@ -63,8 +65,9 @@ def expensive(self) -> bool:
         return True
 
     def output_type(self, *input_types):
-        # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method
+        # The output dtype should be set to a valid Dtype by @udf decorator,
+        # @remote_function decorator, or read_gbq_function method.
         if hasattr(self.func, "bigframes_bigquery_function_output_dtype"):
             return self.func.bigframes_bigquery_function_output_dtype
-        else:
-            raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
+
+        raise AttributeError("bigframes_bigquery_function_output_dtype not defined")
diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py
index 4db7a1c47c..503720edcc 100644
--- a/tests/system/large/functions/test_managed_function.py
+++ b/tests/system/large/functions/test_managed_function.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import pandas
+import pyarrow
 import pytest
 
+import bigframes
 from bigframes.functions import _function_session as bff_session
 from bigframes.functions._utils import get_python_version
 import bigframes.pandas as bpd
@@ -164,3 +166,161 @@ def func(x, y):
         cleanup_function_assets(
             session.bqclient, session.cloudfunctionsclient, managed_func
         )
+
+
+@pytest.mark.parametrize(
+    "array_dtype",
+    [
+        bool,
+        int,
+        float,
+        str,
+    ],
+)
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_managed_function_array_output(session, scalars_dfs, dataset_id, array_dtype):
+    try:
+
+        @session.udf(dataset=dataset_id)
+        def featurize(x: int) -> list[array_dtype]:  # type: ignore
+            return [array_dtype(i) for i in [x, x + 1, x + 2]]
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        bf_int64_col = scalars_df["int64_too"]
+        bf_result = bf_int64_col.apply(featurize).to_pandas()
+
+        pd_int64_col = scalars_pandas_df["int64_too"]
+        pd_result = pd_int64_col.apply(featurize)
+
+        # Ignore any dtype disparity.
+        pandas.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(
+            featurize, session.bqclient, session.cloudfunctionsclient
+        )
+
+
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_managed_function_binop_array_output(session, scalars_dfs, dataset_id):
+    try:
+
+        def func(x, y):
+            return [len(x), abs(y % 4)]
+
+        managed_func = session.udf(
+            input_types=[str, int],
+            output_type=list[int],
+            dataset=dataset_id,
+        )(func)
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+
+        scalars_df = scalars_df.dropna()
+        scalars_pandas_df = scalars_pandas_df.dropna()
+        bf_result = (
+            scalars_df["string_col"]
+            .combine(scalars_df["int64_col"], managed_func)
+            .to_pandas()
+        )
+        pd_result = scalars_pandas_df["string_col"].combine(
+            scalars_pandas_df["int64_col"], func
+        )
+        pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(
+            managed_func, session.bqclient, session.cloudfunctionsclient
+        )
+
+
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_manage_function_df_apply_axis_1_array_output(session):
+    bf_df = bigframes.dataframe.DataFrame(
+        {
+            "Id": [1, 2, 3],
+            "Age": [22.5, 23, 23.5],
+            "Name": ["alpha", "beta", "gamma"],
+        }
+    )
+
+    expected_dtypes = (
+        bigframes.dtypes.INT_DTYPE,
+        bigframes.dtypes.FLOAT_DTYPE,
+        bigframes.dtypes.STRING_DTYPE,
+    )
+
+    # Assert the dataframe dtypes.
+    assert tuple(bf_df.dtypes) == expected_dtypes
+
+    try:
+
+        @session.udf(input_types=[int, float, str], output_type=list[str])
+        def foo(x, y, z):
+            return [str(x), str(y), z]
+
+        assert getattr(foo, "is_row_processor") is False
+        assert getattr(foo, "input_dtypes") == expected_dtypes
+        assert getattr(foo, "output_dtype") == pandas.ArrowDtype(
+            pyarrow.list_(
+                bigframes.dtypes.bigframes_dtype_to_arrow_dtype(
+                    bigframes.dtypes.STRING_DTYPE
+                )
+            )
+        )
+        assert getattr(foo, "output_dtype") == getattr(
+            foo, "bigframes_bigquery_function_output_dtype"
+        )
+
+        # Fails to apply on dataframe with incompatible number of columns.
+        with pytest.raises(
+            ValueError,
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 2 columns\\.$",
+        ):
+            bf_df[["Id", "Age"]].apply(foo, axis=1)
+
+        with pytest.raises(
+            ValueError,
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 4 columns\\.$",
+        ):
+            bf_df.assign(Country="lalaland").apply(foo, axis=1)
+
+        # Fails to apply on dataframe with incompatible column datatypes.
+        with pytest.raises(
+            ValueError,
+            match="^BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
+        ):
+            bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
+
+        # Successfully applies to dataframe with matching number of columns.
+        # and their datatypes.
+        bf_result = bf_df.apply(foo, axis=1).to_pandas()
+
+        # Since this scenario is not pandas-like, let's handcraft the
+        # expected result.
+        expected_result = pandas.Series(
+            [
+                ["1", "22.5", "alpha"],
+                ["2", "23.0", "beta"],
+                ["3", "23.5", "gamma"],
+            ]
+        )
+
+        pandas.testing.assert_series_equal(
+            expected_result, bf_result, check_dtype=False, check_index_type=False
+        )
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient)
diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py
index 350eae3783..65bf20b966 100644
--- a/tests/system/large/functions/test_remote_function.py
+++ b/tests/system/large/functions/test_remote_function.py
@@ -2085,19 +2085,19 @@ def foo(x, y, z):
         # Fails to apply on dataframe with incompatible number of columns
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 3 arguments but DataFrame has 2 columns\\.$",
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 2 columns\\.$",
         ):
             bf_df[["Id", "Age"]].apply(foo, axis=1)
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 3 arguments but DataFrame has 4 columns\\.$",
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 4 columns\\.$",
         ):
             bf_df.assign(Country="lalaland").apply(foo, axis=1)
 
         # Fails to apply on dataframe with incompatible column datatypes
         with pytest.raises(
             ValueError,
-            match="^Remote function takes arguments of types .* but DataFrame dtypes are .*",
+            match="^BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
         ):
             bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
 
@@ -2171,19 +2171,19 @@ def foo(x, y, z):
         # Fails to apply on dataframe with incompatible number of columns
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 3 arguments but DataFrame has 2 columns\\.$",
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 2 columns\\.$",
         ):
             bf_df[["Id", "Age"]].apply(foo, axis=1)
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 3 arguments but DataFrame has 4 columns\\.$",
+            match="^BigFrames BigQuery function takes 3 arguments but DataFrame has 4 columns\\.$",
         ):
             bf_df.assign(Country="lalaland").apply(foo, axis=1)
 
         # Fails to apply on dataframe with incompatible column datatypes
         with pytest.raises(
             ValueError,
-            match="^Remote function takes arguments of types .* but DataFrame dtypes are .*",
+            match="^BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
         ):
             bf_df.assign(Age=bf_df["Age"].astype("Int64")).apply(foo, axis=1)
 
@@ -2240,19 +2240,19 @@ def foo(x):
         # Fails to apply on dataframe with incompatible number of columns
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 1 arguments but DataFrame has 0 columns\\.$",
+            match="^BigFrames BigQuery function takes 1 arguments but DataFrame has 0 columns\\.$",
         ):
             bf_df[[]].apply(foo, axis=1)
         with pytest.raises(
             ValueError,
-            match="^Remote function takes 1 arguments but DataFrame has 2 columns\\.$",
+            match="^BigFrames BigQuery function takes 1 arguments but DataFrame has 2 columns\\.$",
         ):
             bf_df.assign(Country="lalaland").apply(foo, axis=1)
 
         # Fails to apply on dataframe with incompatible column datatypes
         with pytest.raises(
             ValueError,
-            match="^Remote function takes arguments of types .* but DataFrame dtypes are .*",
+            match="^BigFrames BigQuery function takes arguments of types .* but DataFrame dtypes are .*",
         ):
             bf_df.assign(Id=bf_df["Id"].astype("Float64")).apply(foo, axis=1)
 
diff --git a/tests/system/small/functions/test_managed_function.py b/tests/system/small/functions/test_managed_function.py
index 41a5785d01..e1af68512a 100644
--- a/tests/system/small/functions/test_managed_function.py
+++ b/tests/system/small/functions/test_managed_function.py
@@ -62,6 +62,9 @@ def foo(x):
 
     assert hasattr(foo, "bigframes_bigquery_function")
     assert hasattr(foo, "ibis_node")
+    assert hasattr(foo, "input_dtypes")
+    assert hasattr(foo, "output_dtype")
+    assert hasattr(foo, "bigframes_bigquery_function_output_dtype")
 
     scalars_df, scalars_pandas_df = scalars_dfs
 
@@ -124,6 +127,88 @@ def add(x: int, y: int) -> int:
     pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
 
 
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+@pytest.mark.parametrize(
+    ("typ",),
+    [
+        pytest.param(int),
+        pytest.param(float),
+        pytest.param(bool),
+        pytest.param(str),
+    ],
+)
+def test_managed_function_series_apply_list_output(
+    typ,
+    scalars_dfs,
+    dataset_id_permanent,
+):
+    def foo_list(x):
+        # The bytes() constructor expects a non-negative interger as its arg.
+        return [typ(abs(x)), typ(abs(x) + 1)]
+
+    foo_list = udf(
+        input_types=int,
+        output_type=list[typ],  # type: ignore
+        dataset=dataset_id_permanent,
+        name=get_function_name(foo_list),
+    )(foo_list)
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result_col = scalars_df["int64_too"].apply(foo_list)
+    bf_result = (
+        scalars_df["int64_too"].to_frame().assign(result=bf_result_col).to_pandas()
+    )
+
+    pd_result_col = scalars_pandas_df["int64_too"].apply(foo_list)
+    pd_result = scalars_pandas_df["int64_too"].to_frame().assign(result=pd_result_col)
+
+    # Ignore any dtype difference.
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_managed_function_series_combine_list_output(dataset_id_permanent, scalars_dfs):
+    def add_list(x: int, y: int) -> list[int]:
+        return [x, y]
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    int_col_name_with_nulls = "int64_col"
+    int_col_name_no_nulls = "int64_too"
+    bf_df = scalars_df[[int_col_name_with_nulls, int_col_name_no_nulls]]
+    pd_df = scalars_pandas_df[[int_col_name_with_nulls, int_col_name_no_nulls]]
+
+    # Make sure there are NA values in the test column.
+    assert any([pd.isna(val) for val in bf_df[int_col_name_with_nulls]])
+
+    add_list_managed_func = udf(
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_list),
+    )(add_list)
+
+    # After filtering out nulls the managed function application should work
+    # similar to pandas.
+    pd_filter = pd_df[int_col_name_with_nulls].notnull()
+    pd_result = pd_df[pd_filter][int_col_name_with_nulls].combine(
+        pd_df[pd_filter][int_col_name_no_nulls], add_list
+    )
+    bf_filter = bf_df[int_col_name_with_nulls].notnull()
+    bf_result = (
+        bf_df[bf_filter][int_col_name_with_nulls]
+        .combine(bf_df[bf_filter][int_col_name_no_nulls], add_list_managed_func)
+        .to_pandas()
+    )
+
+    # Ignore any dtype difference.
+    pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
+
+
 @pytest.mark.skipif(
     get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
     reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
@@ -197,3 +282,73 @@ def add_ints(x, y):
     pd.testing.assert_series_equal(
         pd_result, bf_result, check_dtype=False, check_exact=True
     )
+
+
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_managed_function_dataframe_map_list_output(scalars_dfs, dataset_id_permanent):
+    def add_one_list(x):
+        return [x + 1] * 3
+
+    mf_add_one_list = udf(
+        input_types=[int],
+        output_type=list[int],
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_one_list),
+    )(add_one_list)
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    int64_cols = ["int64_col", "int64_too"]
+
+    bf_int64_df = scalars_df[int64_cols]
+    bf_int64_df_filtered = bf_int64_df.dropna()
+    bf_result = bf_int64_df_filtered.map(mf_add_one_list).to_pandas()
+
+    pd_int64_df = scalars_pandas_df[int64_cols]
+    pd_int64_df_filtered = pd_int64_df.dropna()
+    pd_result = pd_int64_df_filtered.map(add_one_list)
+
+    # Ignore any dtype difference.
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.skipif(
+    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
+    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
+)
+def test_managed_function_dataframe_apply_axis_1_list_output(
+    session, scalars_dfs, dataset_id_permanent
+):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    series = scalars_df["int64_too"]
+    series_pandas = scalars_pandas_df["int64_too"]
+
+    def add_ints_list(x, y):
+        return [x + y] * 2
+
+    add_ints_list_mf = session.udf(
+        input_types=[int, int],
+        output_type=list[int],
+        dataset=dataset_id_permanent,
+        name=get_function_name(add_ints_list, is_row_processor=True),
+    )(add_ints_list)
+    assert add_ints_list_mf.bigframes_bigquery_function  # type: ignore
+
+    with pytest.warns(
+        bigframes.exceptions.PreviewWarning,
+        match="axis=1 scenario is in preview.",
+    ):
+        bf_result = (
+            bpd.DataFrame({"x": series, "y": series})
+            .apply(add_ints_list_mf, axis=1)
+            .to_pandas()
+        )
+
+    pd_result = pd.DataFrame({"x": series_pandas, "y": series_pandas}).apply(
+        lambda row: add_ints_list(row["x"], row["y"]), axis=1
+    )
+
+    # Ignore any dtype difference.
+    pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)

From 8bba8df66b45ae5e46924dd2aaa04f7e5539a4e5 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Mon, 10 Mar 2025 14:22:46 -0700
Subject: [PATCH 13/19] chore: Cleanup kokoro artifacts at end of build (#1462)

---
 .kokoro/build.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index 58eaa7fedf..6cc03455da 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -50,3 +50,6 @@ if [[ -n "${NOX_SESSION:-}" ]]; then
 else
     python3 -m nox --stop-on-first-error
 fi
+
+# Prevent kokoro from trying to collect many mb of artifacts, wasting several minutes
+sudo rm -rf "${KOKORO_ARTIFACTS_DIR?}"/*

From 0ddee998ca7425047a12f21d2f544d9a034e19fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Mon, 10 Mar 2025 17:49:14 -0500
Subject: [PATCH 14/19] test: pin to older pandas-stubs (#1477)

---
 noxfile.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index ca147e171d..b95e58f4ef 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -256,7 +256,8 @@ def mypy(session):
         set(
             [
                 "mypy",
-                "pandas-stubs",
+                # TODO: update to latest pandas-stubs once we resolve bigframes issues.
+                "pandas-stubs<=2.2.3.241126",
                 "types-protobuf",
                 "types-python-dateutil",
                 "types-requests",

From 9e471fbd1a3661300d988d8307013476029f4ee8 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 11 Mar 2025 10:17:46 -0700
Subject: [PATCH 15/19] test: target e2e tests to python 3.11 for max coverage
 (#1474)

* test: target e2e tests to python 3.12 for max coverage

* ensure large udf tests are run without skip

* remove pytest.mark.skip from one more test

* adjust the expect warnings in the ingress settings and service account

* fix mypy

* remove version 1.x check to surface 2.0 future warning
---
 bigframes/functions/_function_session.py      | 10 +--
 noxfile.py                                    |  8 +-
 .../large/functions/test_managed_function.py  | 12 ---
 .../large/functions/test_remote_function.py   | 86 +++++++++++++------
 4 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py
index 0ae674b97d..a66f619cf9 100644
--- a/bigframes/functions/_function_session.py
+++ b/bigframes/functions/_function_session.py
@@ -47,7 +47,6 @@
 )
 
 from bigframes import clients
-from bigframes import version as bigframes_version
 import bigframes.core.compile.ibis_types
 import bigframes.exceptions as bfe
 import bigframes.series as bf_series
@@ -458,16 +457,13 @@ def remote_function(
         msg = bfe.format_message(
             "You have not explicitly set a user-managed `cloud_function_service_account`. "
             "Using the default Compute Engine service account. "
-            "To use Bigframes 2.0, please explicitly set `cloud_function_service_account` "
+            "In BigFrames 2.0 onwards, you would have to explicitly set `cloud_function_service_account` "
             'either to a user-managed service account (preferred) or to `"default"` '
-            "to use the Compute Engine service account (discouraged). "
+            "to use the default Compute Engine service account (discouraged). "
             "See, https://cloud.google.com/functions/docs/securing/function-identity."
         )
 
-        if (
-            bigframes_version.__version__.startswith("1.")
-            and cloud_function_service_account is None
-        ):
+        if cloud_function_service_account is None:
             warnings.warn(msg, stacklevel=2, category=FutureWarning)
 
         if cloud_function_service_account == "default":
diff --git a/noxfile.py b/noxfile.py
index b95e58f4ef..a08ef27781 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -59,6 +59,12 @@
 
 DEFAULT_PYTHON_VERSION = "3.10"
 
+# Cloud Run Functions supports Python versions up to 3.12
+# https://cloud.google.com/run/docs/runtimes/python
+# Managed Python UDF is supported only in Python 3.11
+# Let's set the E2E tests version to 3.11 to cover most code paths.
+E2E_TEST_PYTHON_VERSION = "3.11"
+
 UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]
 UNIT_TEST_STANDARD_DEPENDENCIES = [
     "mock",
@@ -418,7 +424,7 @@ def doctest(session: nox.sessions.Session):
     )
 
 
-@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS[-1])
+@nox.session(python=E2E_TEST_PYTHON_VERSION)
 def e2e(session: nox.sessions.Session):
     """Run the large tests in system test suite."""
     run_system(
diff --git a/tests/system/large/functions/test_managed_function.py b/tests/system/large/functions/test_managed_function.py
index 503720edcc..efab338861 100644
--- a/tests/system/large/functions/test_managed_function.py
+++ b/tests/system/large/functions/test_managed_function.py
@@ -25,10 +25,6 @@
 bpd.options.experiments.udf = True
 
 
-@pytest.mark.skipif(
-    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
-    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
-)
 def test_managed_function_multiply_with_ibis(
     session,
     scalars_table_id,
@@ -80,10 +76,6 @@ def multiply(x, y):
         cleanup_function_assets(multiply, bigquery_client)
 
 
-@pytest.mark.skipif(
-    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
-    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
-)
 def test_managed_function_stringify_with_ibis(
     session,
     scalars_table_id,
@@ -132,10 +124,6 @@ def stringify(x):
         )
 
 
-@pytest.mark.skipif(
-    get_python_version() not in bff_session._MANAGED_FUNC_PYTHON_VERSIONS,
-    reason=f"Supported version: {bff_session._MANAGED_FUNC_PYTHON_VERSIONS}",
-)
 def test_managed_function_binop(session, scalars_dfs, dataset_id):
     try:
 
diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py
index 65bf20b966..0d7f888306 100644
--- a/tests/system/large/functions/test_remote_function.py
+++ b/tests/system/large/functions/test_remote_function.py
@@ -17,10 +17,11 @@
 import inspect
 import math  # must keep this at top level to test udf referring global import
 import os.path
+import re
 import shutil
-import sys
 import tempfile
 import textwrap
+import typing
 import warnings
 
 import google.api_core.exceptions
@@ -50,12 +51,6 @@
 _team_euler = "Team Euler"
 
 
-pytestmark = pytest.mark.skipif(
-    sys.version_info >= (3, 13),
-    reason="Runtime 'python313' is not supported yet. Skip for now.",
-)
-
-
 def make_uniq_udf(udf):
     """Transform a udf to another with same behavior but a unique name.
     Use this to test remote functions with reuse=True, in which case parallel
@@ -1323,14 +1318,38 @@ def square_num(x):
         )
 
 
-def test_remote_function_warns_default_cloud_function_service_account():
-    project = "bigframes-dev-perf"
-    rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))
-
-    with pytest.warns(FutureWarning, match="You have not explicitly set a"):
-        rf_session.remote_function(
-            cloud_function_service_account=None,  # Explicitly omit service account.
-        )
+@pytest.mark.parametrize(
+    ("remote_function_args"),
+    [
+        pytest.param(
+            {},
+            id="no-set",
+        ),
+        pytest.param(
+            {"cloud_function_service_account": None},
+            id="set-none",
+        ),
+    ],
+)
+def test_remote_function_warns_default_cloud_function_service_account(
+    session, remote_function_args
+):
+    with pytest.warns(FutureWarning) as record:
+        session.remote_function(**remote_function_args)
+
+    len(
+        [
+            warn
+            for warn in record
+            if re.search(
+                (
+                    "You have not explicitly set a user-managed.*Using the default Compute Engine.*service account"
+                ),
+                typing.cast(FutureWarning, warn.message).args[0],
+                re.DOTALL,
+            )
+        ]
+    ) == 1
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -2319,36 +2338,40 @@ def generate_stats(row: pandas.Series) -> list[int]:
 
 
 @pytest.mark.parametrize(
-    ("ingress_settings_args", "effective_ingress_settings", "expected_warning"),
+    (
+        "ingress_settings_args",
+        "effective_ingress_settings",
+        "expect_default_ingress_setting_warning",
+    ),
     [
         pytest.param(
             {},
             functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL,
-            FutureWarning,
+            True,
             id="no-set",
         ),
         pytest.param(
             {"cloud_function_ingress_settings": None},
             functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL,
-            FutureWarning,
+            True,
             id="set-none",
         ),
         pytest.param(
             {"cloud_function_ingress_settings": "all"},
             functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL,
-            None,
+            False,
             id="set-all",
         ),
         pytest.param(
             {"cloud_function_ingress_settings": "internal-only"},
             functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY,
-            None,
+            False,
             id="set-internal-only",
         ),
         pytest.param(
             {"cloud_function_ingress_settings": "internal-and-gclb"},
             functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_AND_GCLB,
-            None,
+            False,
             id="set-internal-and-gclb",
         ),
     ],
@@ -2359,11 +2382,11 @@ def test_remote_function_ingress_settings(
     scalars_dfs,
     ingress_settings_args,
     effective_ingress_settings,
-    expected_warning,
+    expect_default_ingress_setting_warning,
 ):
     try:
         # Verify the function raises the expected security warning message.
-        with warnings.catch_warnings(record=True) as w:
+        with warnings.catch_warnings(record=True) as record:
 
             def square(x: int) -> int:
                 return x * x
@@ -2372,11 +2395,18 @@ def square(x: int) -> int:
                 reuse=False, **ingress_settings_args
             )(square)
 
-            if expected_warning is not None:
-                assert issubclass(w[0].category, FutureWarning)
-                assert "Consider using 'internal-only' for enhanced security." in str(
-                    w[0].message
-                )
+        default_ingress_setting_warnings = [
+            warn
+            for warn in record
+            if isinstance(warn.message, FutureWarning)
+            and "`cloud_function_ingress_settings` are set to 'all' by default"
+            in warn.message.args[0]
+            and "will change to 'internal-only' for enhanced security in future"
+            in warn.message.args[0]
+        ]
+        assert len(default_ingress_setting_warnings) == (
+            1 if expect_default_ingress_setting_warning else 0
+        )
 
         # Assert that the GCF is created with the intended maximum timeout
         gcf = session.cloudfunctionsclient.get_function(

From 01dfe837740ba7119298cced6d9638af7326049b Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Tue, 11 Mar 2025 10:57:20 -0700
Subject: [PATCH 16/19] chore: fix experimental blob errors in preview
 non-exist files (#1479)

---
 bigframes/dataframe.py       | 18 +++++++++---------
 bigframes/operations/blob.py | 12 ++++++++++--
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 2349e469ab..262b23abd2 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -778,15 +778,15 @@ def _repr_html_(self) -> str:
 
                 def obj_ref_rt_to_html(obj_ref_rt) -> str:
                     obj_ref_rt_json = json.loads(obj_ref_rt)
-                    gcs_metadata = obj_ref_rt_json["objectref"]["details"][
-                        "gcs_metadata"
-                    ]
-                    content_type = typing.cast(
-                        str, gcs_metadata.get("content_type", "")
-                    )
-                    if content_type.startswith("image"):
-                        url = obj_ref_rt_json["access_urls"]["read_url"]
-                        return f'<img src="{url}">'
+                    obj_ref_details = obj_ref_rt_json["objectref"]["details"]
+                    if "gcs_metadata" in obj_ref_details:
+                        gcs_metadata = obj_ref_details["gcs_metadata"]
+                        content_type = typing.cast(
+                            str, gcs_metadata.get("content_type", "")
+                        )
+                        if content_type.startswith("image"):
+                            url = obj_ref_rt_json["access_urls"]["read_url"]
+                            return f'<img src="{url}">'
 
                     return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'
 
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 183003780b..88a58acbfa 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -18,6 +18,7 @@
 from typing import cast, Optional, Union
 
 import IPython.display as ipy_display
+import pandas as pd
 import requests
 
 from bigframes import clients
@@ -209,8 +210,15 @@ def display(self, n: int = 3, *, content_type: str = ""):
         else:
             df["content_type"] = df["blob_col"].blob.content_type()
 
-        def display_single_url(read_url: str, content_type: str):
-            content_type = content_type.casefold()
+        def display_single_url(
+            read_url: str, content_type: Union[str, pd._libs.missing.NAType]
+        ):
+            if content_type is pd.NA:  # display as raw data or error
+                response = requests.get(read_url)
+                ipy_display.display(response.content)
+                return
+
+            content_type = cast(str, content_type).casefold()
 
             if content_type.startswith("image"):
                 ipy_display.display(ipy_display.Image(url=read_url))

From e720f41ef643ac14ae94fa98de5ef4a3fd6dde93 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Tue, 11 Mar 2025 12:13:24 -0700
Subject: [PATCH 17/19] feat!: reading JSON data as a custom arrow extension
 type (#1458)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: use JSONArrowType for JSON data

* fix related system tests

* fixes lint and doctest

* switch db_dtypes into 1.4.2

* fix tests

* fix test_df_drop_duplicates_w_json

* commit suggestion

---------

Co-authored-by: Tim Sweña (Swast) <tswast@gmail.com>
Release-As: 1.40.0
---
 bigframes/bigquery/_operations/json.py   |  4 +-
 bigframes/core/array_value.py            |  4 +-
 bigframes/core/compile/ibis_types.py     |  2 +-
 bigframes/dtypes.py                      |  7 +-
 bigframes/session/_io/pandas.py          |  3 -
 setup.py                                 |  2 +-
 testing/constraints-3.9.txt              |  2 +-
 tests/system/small/bigquery/test_json.py | 59 ++++++++--------
 tests/system/small/test_dataframe.py     | 13 +++-
 tests/system/small/test_dataframe_io.py  | 85 +++++++++++-------------
 tests/system/small/test_series.py        | 26 ++++----
 tests/system/small/test_session.py       | 25 ++++---
 12 files changed, 119 insertions(+), 113 deletions(-)

diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py
index 0223811ebc..07efc5fa51 100644
--- a/bigframes/bigquery/_operations/json.py
+++ b/bigframes/bigquery/_operations/json.py
@@ -53,7 +53,7 @@ def json_set(
         >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"]
         >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")])
             0    {"a":100,"b":"hi"}
-            Name: data, dtype: dbjson
+            Name: data, dtype: extension<dbjson<JSONArrowType>>[pyarrow]
 
     Args:
         input (bigframes.series.Series):
@@ -253,7 +253,7 @@ def parse_json(
         dtype: string
         >>> bbq.parse_json(s)
         0    {"class":{"students":[{"id":5},{"id":12}]}}
-        dtype: dbjson
+        dtype: extension<dbjson<JSONArrowType>>[pyarrow]
 
     Args:
         input (bigframes.series.Series):
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 9c44255941..7ede7b7e65 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -108,8 +108,8 @@ def from_table(
             raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
             msg = bfe.format_message(
-                "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is "
-                "in preview; this behavior may change in future versions."
+                "JSON column interpretation as a custom PyArrow extention in `db_dtypes` "
+                "is a preview feature and subject to change."
             )
             warnings.warn(msg, bfe.PreviewWarning)
         # define data source only for needed columns, this makes row-hashing cheaper
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index 54a5a37736..54b0a1408a 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -75,7 +75,7 @@
         IBIS_GEO_TYPE,
         gpd.array.GeometryDtype(),
     ),
-    (ibis_dtypes.json, db_dtypes.JSONDtype()),
+    (ibis_dtypes.json, pd.ArrowDtype(db_dtypes.JSONArrowType())),
 )
 
 BIGFRAMES_TO_IBIS: Dict[bigframes.dtypes.Dtype, ibis_dtypes.DataType] = {
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 5e9f1f108b..22cc521e8e 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -62,7 +62,9 @@
 # No arrow equivalent
 GEO_DTYPE = gpd.array.GeometryDtype()
 # JSON
-JSON_DTYPE = db_dtypes.JSONDtype()
+# TODO: switch to pyarrow.json_(pyarrow.string()) when available.
+JSON_ARROW_TYPE = db_dtypes.JSONArrowType()
+JSON_DTYPE = pd.ArrowDtype(JSON_ARROW_TYPE)
 OBJ_REF_DTYPE = pd.ArrowDtype(
     pa.struct(
         (
@@ -80,7 +82,7 @@
             ),
             pa.field(
                 "details",
-                db_dtypes.JSONArrowType(),
+                JSON_ARROW_TYPE,
             ),
         )
     )
@@ -301,7 +303,6 @@ def is_object_like(type_: Union[ExpressionType, str]) -> bool:
     return type_ in ("object", "O") or (
         getattr(type_, "kind", None) == "O"
         and getattr(type_, "storage", None) != "pyarrow"
-        and getattr(type_, "name", None) != "dbjson"
     )
 
 
diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py
index a1549238b3..ca70ee774c 100644
--- a/bigframes/session/_io/pandas.py
+++ b/bigframes/session/_io/pandas.py
@@ -18,7 +18,6 @@
 from typing import Collection, Union
 
 import bigframes_vendored.constants as constants
-import db_dtypes  # type: ignore
 import geopandas  # type: ignore
 import numpy as np
 import pandas
@@ -125,8 +124,6 @@ def arrow_to_pandas(
             )
         elif isinstance(dtype, pandas.ArrowDtype):
             series = _arrow_to_pandas_arrowdtype(column, dtype)
-        elif isinstance(dtype, db_dtypes.JSONDtype):
-            series = db_dtypes.JSONArray(column)
         else:
             series = column.to_pandas(types_mapper=lambda _: dtype)
 
diff --git a/setup.py b/setup.py
index 9ea563b3cb..34e013c9a3 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@
     "ipywidgets >=7.7.1",
     "humanize >=4.6.0",
     "matplotlib >=3.7.1",
-    "db-dtypes >=1.4.0",
+    "db-dtypes >=1.4.2",
     # For vendored ibis-framework.
     "atpublic>=2.3,<6",
     "parsy>=2,<3",
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index b355e0915b..8c7c69efa7 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -25,7 +25,7 @@ tabulate==0.9
 ipywidgets==7.7.1
 humanize==4.6.0
 matplotlib==3.7.1
-db-dtypes==1.4.0
+db-dtypes==1.4.2
 # For vendored ibis-framework.
 atpublic==2.3
 parsy==2.0
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 492c0cf9b6..bade725733 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -12,30 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import db_dtypes  # type: ignore
 import geopandas as gpd  # type: ignore
 import pandas as pd
 import pyarrow as pa
 import pytest
 
 import bigframes.bigquery as bbq
-import bigframes.dtypes
+import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
 
 
 @pytest.mark.parametrize(
     ("json_path", "expected_json"),
     [
-        pytest.param("$.a", [{"a": 10}], id="simple"),
-        pytest.param("$.a.b.c", [{"a": {"b": {"c": 10, "d": []}}}], id="nested"),
+        pytest.param("$.a", ['{"a": 10}'], id="simple"),
+        pytest.param("$.a.b.c", ['{"a": {"b": {"c": 10, "d": []}}}'], id="nested"),
     ],
 )
 def test_json_set_at_json_path(json_path, expected_json):
-    original_json = [{"a": {"b": {"c": "tester", "d": []}}}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": {"b": {"c": "tester", "d": []}}}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
 
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -45,18 +44,20 @@ def test_json_set_at_json_path(json_path, expected_json):
 @pytest.mark.parametrize(
     ("json_value", "expected_json"),
     [
-        pytest.param(10, [{"a": {"b": 10}}, {"a": {"b": 10}}], id="int"),
-        pytest.param(0.333, [{"a": {"b": 0.333}}, {"a": {"b": 0.333}}], id="float"),
-        pytest.param("eng", [{"a": {"b": "eng"}}, {"a": {"b": "eng"}}], id="string"),
-        pytest.param([1, 2], [{"a": {"b": 1}}, {"a": {"b": 2}}], id="series"),
+        pytest.param(10, ['{"a": {"b": 10}}', '{"a": {"b": 10}}'], id="int"),
+        pytest.param(0.333, ['{"a": {"b": 0.333}}', '{"a": {"b": 0.333}}'], id="float"),
+        pytest.param(
+            "eng", ['{"a": {"b": "eng"}}', '{"a": {"b": "eng"}}'], id="string"
+        ),
+        pytest.param([1, 2], ['{"a": {"b": 1}}', '{"a": {"b": 2}}'], id="series"),
     ],
 )
 def test_json_set_at_json_value_type(json_value, expected_json):
-    original_json = [{"a": {"b": "dev"}}, {"a": {"b": [1, 2]}}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": {"b": "dev"}}', '{"a": {"b": [1, 2]}}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
 
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -64,14 +65,14 @@ def test_json_set_at_json_value_type(json_value, expected_json):
 
 
 def test_json_set_w_more_pairs():
-    original_json = [{"a": 2}, {"b": 5}, {"c": 1}]
-    s = bpd.Series(original_json, dtype=db_dtypes.JSONDtype())
+    original_json = ['{"a": 2}', '{"b": 5}', '{"c": 1}']
+    s = bpd.Series(original_json, dtype=dtypes.JSON_DTYPE)
     actual = bbq.json_set(
         s, json_path_value_pairs=[("$.a", 1), ("$.b", 2), ("$.a", [3, 4, 5])]
     )
 
-    expected_json = [{"a": 3, "b": 2}, {"a": 4, "b": 2}, {"a": 5, "b": 2, "c": 1}]
-    expected = bpd.Series(expected_json, dtype=db_dtypes.JSONDtype())
+    expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
+    expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
     pd.testing.assert_series_equal(
         actual.to_pandas(),
         expected.to_pandas(),
@@ -79,13 +80,13 @@ def test_json_set_w_more_pairs():
 
 
 def test_json_set_w_invalid_json_path_value_pairs():
-    s = bpd.Series([{"a": 10}], dtype=db_dtypes.JSONDtype())
+    s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
     with pytest.raises(ValueError):
         bbq.json_set(s, json_path_value_pairs=[("$.a", 1, 100)])  # type: ignore
 
 
 def test_json_set_w_invalid_value_type():
-    s = bpd.Series([{"a": 10}], dtype=db_dtypes.JSONDtype())
+    s = bpd.Series(['{"a": 10}'], dtype=dtypes.JSON_DTYPE)
     with pytest.raises(TypeError):
         bbq.json_set(
             s,
@@ -101,17 +102,18 @@ def test_json_set_w_invalid_value_type():
 
 
 def test_json_set_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.json_set(bpd.Series([1, 2]), json_path_value_pairs=[("$.a", 1)])
+        bbq.json_set(s, json_path_value_pairs=[("$.a", 1)])
 
 
 def test_json_extract_from_json():
     s = bpd.Series(
-        [{"a": {"b": [1, 2]}}, {"a": {"c": 1}}, {"a": {"b": 0}}],
-        dtype=db_dtypes.JSONDtype(),
+        ['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
+        dtype=dtypes.JSON_DTYPE,
     )
     actual = bbq.json_extract(s, "$.a.b").to_pandas()
-    expected = bpd.Series([[1, 2], None, 0], dtype=db_dtypes.JSONDtype()).to_pandas()
+    expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE).to_pandas()
     pd.testing.assert_series_equal(
         actual,
         expected,
@@ -132,14 +134,15 @@ def test_json_extract_from_string():
 
 
 def test_json_extract_w_invalid_series_type():
+    s = bpd.Series([1, 2])
     with pytest.raises(TypeError):
-        bbq.json_extract(bpd.Series([1, 2]), "$.a")
+        bbq.json_extract(s, "$.a")
 
 
 def test_json_extract_array_from_json():
     s = bpd.Series(
-        [{"a": ["ab", "2", "3 xy"]}, {"a": []}, {"a": ["4", "5"]}, {}],
-        dtype=db_dtypes.JSONDtype(),
+        ['{"a": ["ab", "2", "3 xy"]}', '{"a": []}', '{"a": ["4", "5"]}', "{}"],
+        dtype=dtypes.JSON_DTYPE,
     )
     actual = bbq.json_extract_array(s, "$.a")
 
@@ -225,7 +228,7 @@ def test_json_extract_string_array_from_array_strings():
 
 def test_json_extract_string_array_as_float_array_from_array_strings():
     s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
-    actual = bbq.json_extract_string_array(s, value_dtype=bigframes.dtypes.FLOAT_DTYPE)
+    actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
     expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
     pd.testing.assert_series_equal(
         actual.to_pandas(),
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index f80b811217..9415f9657e 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -30,6 +30,7 @@
 import bigframes._config.display_options as display_options
 import bigframes.core.indexes as bf_indexes
 import bigframes.dataframe as dataframe
+import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
 import bigframes.series as series
 from tests.system.utils import (
@@ -4584,7 +4585,17 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
 )
 def test_df_drop_duplicates_w_json(json_df, keep):
     bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
-    pd_df = json_df.to_pandas().drop_duplicates(keep=keep)
+
+    # drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
+    # with Arrow string extension types. Temporary conversion to standard Pandas
+    # strings is required.
+    json_pandas_df = json_df.to_pandas()
+    json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
+        pd.StringDtype(storage="pyarrow")
+    )
+
+    pd_df = json_pandas_df.drop_duplicates(keep=keep)
+    pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE)
     pd.testing.assert_frame_equal(
         pd_df,
         bf_df,
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 4758c2d5b4..e80668939a 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import math
 from typing import Tuple
 
 import db_dtypes  # type:ignore
@@ -22,6 +21,7 @@
 import pyarrow as pa
 import pytest
 
+import bigframes.dtypes as dtypes
 from tests.system import utils
 
 try:
@@ -35,7 +35,6 @@
 from google.cloud import bigquery
 
 import bigframes
-from bigframes import dtypes
 import bigframes.dataframe
 import bigframes.features
 import bigframes.pandas as bpd
@@ -278,7 +277,7 @@ def test_to_arrow_override_global_option(scalars_df_index):
     assert scalars_df_index._query_job.destination.table_id == table_id
 
 
-def test_load_json_w_unboxed_py_value(session):
+def test_load_json_w_json_string_items(session):
     sql = """
         SELECT 0 AS id, JSON_OBJECT('boolean', True) AS json_col,
         UNION ALL
@@ -292,42 +291,43 @@ def test_load_json_w_unboxed_py_value(session):
         UNION ALL
         SELECT 5, JSON_OBJECT('null', null),
         UNION ALL
+        SELECT 6, JSON_OBJECT('b', 2, 'a', 1),
+        UNION ALL
         SELECT
-            6,
+            7,
             JSON_OBJECT(
                 'dict',
                 JSON_OBJECT(
                     'int', 1,
-                    'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
+                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
                 )
             ),
     """
     df = session.read_gbq(sql, index_col="id")
 
-    assert df.dtypes["json_col"] == db_dtypes.JSONDtype()
-    assert isinstance(df["json_col"][0], dict)
+    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
+
+    assert df["json_col"][0] == '{"boolean":true}'
+    assert df["json_col"][1] == '{"int":100}'
+    assert df["json_col"][2] == '{"float":0.98}'
+    assert df["json_col"][3] == '{"string":"hello world"}'
+    assert df["json_col"][4] == '{"array":[8,9,10]}'
+    assert df["json_col"][5] == '{"null":null}'
 
-    assert df["json_col"][0]["boolean"]
-    assert df["json_col"][1]["int"] == 100
-    assert math.isclose(df["json_col"][2]["float"], 0.98)
-    assert df["json_col"][3]["string"] == "hello world"
-    assert df["json_col"][4]["array"] == [8, 9, 10]
-    assert df["json_col"][5]["null"] is None
-    assert df["json_col"][6]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    # Verifies JSON strings preserve array order, regardless of dictionary key order.
+    assert df["json_col"][6] == '{"a":1,"b":2}'
+    assert df["json_col"][7] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
 
 
 def test_load_json_to_pandas_has_correct_result(session):
     df = session.read_gbq("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_col")
-    assert df.dtypes["json_col"] == db_dtypes.JSONDtype()
+    assert df.dtypes["json_col"] == pd.ArrowDtype(db_dtypes.JSONArrowType())
     result = df.to_pandas()
 
-    # The order of keys within the JSON object shouldn't matter for equality checks.
+    # These JSON strings are compatible with BigQuery's JSON storage,
     pd_df = pd.DataFrame(
-        {"json_col": [{"bar": True, "foo": 10}]},
-        dtype=db_dtypes.JSONDtype(),
+        {"json_col": ['{"bar":true,"foo":10}']},
+        dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()),
     )
     pd_df.index = pd_df.index.astype("Int64")
     pd.testing.assert_series_equal(result.dtypes, pd_df.dtypes)
@@ -355,7 +355,7 @@ def test_load_json_in_struct(session):
                 'dict',
                 JSON_OBJECT(
                     'int', 1,
-                    'array', [JSON_OBJECT('bar', 'hello'), JSON_OBJECT('foo', 1)]
+                    'array', [JSON_OBJECT('foo', 1), JSON_OBJECT('bar', 'hello')]
                 )
             ), 7),
     """
@@ -365,18 +365,15 @@ def test_load_json_in_struct(session):
     assert isinstance(df.dtypes["struct_col"].pyarrow_dtype, pa.StructType)
 
     data = df["struct_col"].struct.field("data")
-    assert data.dtype == db_dtypes.JSONDtype()
+    assert data.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    assert data[0]["boolean"]
-    assert data[1]["int"] == 100
-    assert math.isclose(data[2]["float"], 0.98)
-    assert data[3]["string"] == "hello world"
-    assert data[4]["array"] == [8, 9, 10]
-    assert data[5]["null"] is None
-    assert data[6]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    assert data[0] == '{"boolean":true}'
+    assert data[1] == '{"int":100}'
+    assert data[2] == '{"float":0.98}'
+    assert data[3] == '{"string":"hello world"}'
+    assert data[4] == '{"array":[8,9,10]}'
+    assert data[5] == '{"null":null}'
+    assert data[6] == '{"dict":{"array":[{"foo":1},{"bar":"hello"}],"int":1}}'
 
 
 def test_load_json_in_array(session):
@@ -406,18 +403,15 @@ def test_load_json_in_array(session):
 
     data = df["array_col"].list
     assert data.len()[0] == 7
-    assert data[0].dtype == db_dtypes.JSONDtype()
+    assert data[0].dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
 
-    assert data[0][0]["boolean"]
-    assert data[1][0]["int"] == 100
-    assert math.isclose(data[2][0]["float"], 0.98)
-    assert data[3][0]["string"] == "hello world"
-    assert data[4][0]["array"] == [8, 9, 10]
-    assert data[5][0]["null"] is None
-    assert data[6][0]["dict"] == {
-        "int": 1,
-        "array": [{"bar": "hello"}, {"foo": 1}],
-    }
+    assert data[0][0] == '{"boolean":true}'
+    assert data[1][0] == '{"int":100}'
+    assert data[2][0] == '{"float":0.98}'
+    assert data[3][0] == '{"string":"hello world"}'
+    assert data[4][0] == '{"array":[8,9,10]}'
+    assert data[5][0] == '{"null":null}'
+    assert data[6][0] == '{"dict":{"array":[{"bar":"hello"},{"foo":1}],"int":1}}'
 
 
 def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
@@ -691,7 +685,8 @@ def test_to_gbq_w_json(bigquery_client):
     """Test the `to_gbq` API can get a JSON column."""
     s1 = bpd.Series([1, 2, 3, 4])
     s2 = bpd.Series(
-        ["a", 1, False, ["a", {"b": 1}], {"c": [1, 2, 3]}], dtype=db_dtypes.JSONDtype()
+        ['"a"', "1", "false", '["a", {"b": 1}]', '{"c": [1, 2, 3]}'],
+        dtype=dtypes.JSON_DTYPE,
     )
 
     df = bpd.DataFrame({"id": s1, "json_col": s2})
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 980f2226b7..d62af962fc 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -26,6 +26,7 @@
 import pytest
 import shapely  # type: ignore
 
+import bigframes.dtypes as dtypes
 import bigframes.features
 import bigframes.pandas
 import bigframes.series as series
@@ -304,22 +305,21 @@ def test_series_construct_w_dtype_for_array_struct():
 
 def test_series_construct_w_dtype_for_json():
     data = [
-        1,
-        "str",
-        False,
-        ["a", {"b": 1}, None],
+        "1",
+        '"str"',
+        "false",
+        '["a", {"b": 1}, null]',
         None,
-        {"a": {"b": [1, 2, 3], "c": True}},
+        '{"a": {"b": [1, 2, 3], "c": true}}',
     ]
-    s = bigframes.pandas.Series(data, dtype=db_dtypes.JSONDtype())
+    s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
 
-    assert s[0] == 1
-    assert s[1] == "str"
-    assert s[2] is False
-    assert s[3][0] == "a"
-    assert s[3][1]["b"] == 1
+    assert s[0] == "1"
+    assert s[1] == '"str"'
+    assert s[2] == "false"
+    assert s[3] == '["a",{"b":1},null]'
     assert pd.isna(s[4])
-    assert s[5]["a"] == {"b": [1, 2, 3], "c": True}
+    assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
 
 
 def test_series_keys(scalars_dfs):
@@ -383,7 +383,7 @@ def test_get_column(scalars_dfs, col_name, expected_dtype):
 def test_get_column_w_json(json_df, json_pandas_df):
     series = json_df["json_col"]
     series_pandas = series.to_pandas()
-    assert series.dtype == db_dtypes.JSONDtype()
+    assert series.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
     assert series_pandas.shape[0] == json_pandas_df.shape[0]
 
 
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 0c8da52774..4b7495694b 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -22,7 +22,6 @@
 import warnings
 
 import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq
-import db_dtypes  # type: ignore
 import google
 import google.cloud.bigquery as bigquery
 import numpy as np
@@ -759,13 +758,13 @@ def test_read_pandas_timedelta_index(session, write_engine):
 )
 def test_read_pandas_json_dataframes(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
     expected_df = pd.DataFrame(
-        {"my_col": pd.Series(json_data, dtype=db_dtypes.JSONDtype())}
+        {"my_col": pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)}
     )
 
     actual_result = session.read_pandas(
@@ -783,12 +782,12 @@ def test_read_pandas_json_dataframes(session, write_engine):
 )
 def test_read_pandas_json_series(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
-    expected_series = pd.Series(json_data, dtype=db_dtypes.JSONDtype())
+    expected_series = pd.Series(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
 
     actual_result = session.read_pandas(
         expected_series, write_engine=write_engine
@@ -807,12 +806,12 @@ def test_read_pandas_json_series(session, write_engine):
 )
 def test_read_pandas_json_index(session, write_engine):
     json_data = [
-        1,
+        "1",
         None,
-        ["1", "3", "5"],
-        {"a": 1, "b": ["x", "y"], "c": {"z": False, "x": []}},
+        '["1","3","5"]',
+        '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}',
     ]
-    expected_index = pd.Index(json_data, dtype=db_dtypes.JSONDtype())
+    expected_index: pd.Index = pd.Index(json_data, dtype=bigframes.dtypes.JSON_DTYPE)
     actual_result = session.read_pandas(
         expected_index, write_engine=write_engine
     ).to_pandas()

From 0db248b5597a3966ac3dee1cca849509e48f4648 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Tue, 11 Mar 2025 14:24:34 -0700
Subject: [PATCH 18/19] fix: use `==` instead of `is` for timedelta type
 equality checks (#1480)

* fix: use  instead of  for timedelta type equality checks

* use int column for casting
---
 bigframes/core/rewrite/timedeltas.py          | 30 +++++++++----------
 bigframes/operations/aggregations.py          |  8 ++---
 bigframes/operations/numeric_ops.py           | 22 +++++++-------
 bigframes/operations/timedelta_ops.py         |  8 ++---
 .../small/operations/test_timedeltas.py       | 29 ++++++++++++------
 5 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/bigframes/core/rewrite/timedeltas.py b/bigframes/core/rewrite/timedeltas.py
index bf3c0ee639..ea8e608a84 100644
--- a/bigframes/core/rewrite/timedeltas.py
+++ b/bigframes/core/rewrite/timedeltas.py
@@ -111,7 +111,7 @@ def _rewrite_expressions(expr: ex.Expression, schema: schema.ArraySchema) -> _Ty
 
 
 def _rewrite_scalar_constant_expr(expr: ex.ScalarConstantExpression) -> _TypedExpr:
-    if expr.dtype is dtypes.TIMEDELTA_DTYPE:
+    if expr.dtype == dtypes.TIMEDELTA_DTYPE:
         int_repr = utils.timedelta_to_micros(expr.value)  # type: ignore
         return _TypedExpr(ex.const(int_repr, expr.dtype), expr.dtype)
 
@@ -148,31 +148,31 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype):
         return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right)
 
-    if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
+    if dtypes.is_datetime_like(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE:
         return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right)
 
     if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.DATE_DTYPE:
         return _TypedExpr.create_op_expr(ops.date_diff_op, left, right)
 
-    if left.dtype == dtypes.DATE_DTYPE and right.dtype is dtypes.TIMEDELTA_DTYPE:
+    if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
         return _TypedExpr.create_op_expr(ops.date_sub_op, left, right)
 
     return _TypedExpr.create_op_expr(ops.sub_op, left, right)
 
 
 def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
-    if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
+    if dtypes.is_datetime_like(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE:
         return _TypedExpr.create_op_expr(ops.timestamp_add_op, left, right)
 
-    if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right.dtype):
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right.dtype):
         # Re-arrange operands such that timestamp is always on the left and timedelta is
         # always on the right.
         return _TypedExpr.create_op_expr(ops.timestamp_add_op, right, left)
 
-    if left.dtype == dtypes.DATE_DTYPE and right.dtype is dtypes.TIMEDELTA_DTYPE:
+    if left.dtype == dtypes.DATE_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
         return _TypedExpr.create_op_expr(ops.date_add_op, left, right)
 
-    if left.dtype is dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.DATE_DTYPE:
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.DATE_DTYPE:
         # Re-arrange operands such that date is always on the left and timedelta is
         # always on the right.
         return _TypedExpr.create_op_expr(ops.date_add_op, right, left)
@@ -183,9 +183,9 @@ def _rewrite_add_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
 def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.mul_op, left, right)
 
-    if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
         return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
-    if dtypes.is_numeric(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
+    if dtypes.is_numeric(left.dtype) and right.dtype == dtypes.TIMEDELTA_DTYPE:
         return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
@@ -194,7 +194,7 @@ def _rewrite_mul_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
 def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.div_op, left, right)
 
-    if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
         return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
@@ -203,14 +203,14 @@ def _rewrite_div_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
 def _rewrite_floordiv_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
     result = _TypedExpr.create_op_expr(ops.floordiv_op, left, right)
 
-    if left.dtype is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
+    if left.dtype == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right.dtype):
         return _TypedExpr.create_op_expr(ops.timedelta_floor_op, result)
 
     return result
 
 
 def _rewrite_to_timedelta_op(op: ops.ToTimedeltaOp, arg: _TypedExpr):
-    if arg.dtype is dtypes.TIMEDELTA_DTYPE:
+    if arg.dtype == dtypes.TIMEDELTA_DTYPE:
         # Do nothing for values that are already timedeltas
         return arg
 
@@ -239,19 +239,19 @@ def _rewrite_aggregation(
                 aggs.DateSeriesDiffOp(aggregation.op.periods), aggregation.arg
             )
 
-    if isinstance(aggregation.op, aggs.StdOp) and input_type is dtypes.TIMEDELTA_DTYPE:
+    if isinstance(aggregation.op, aggs.StdOp) and input_type == dtypes.TIMEDELTA_DTYPE:
         return ex.UnaryAggregation(
             aggs.StdOp(should_floor_result=True), aggregation.arg
         )
 
-    if isinstance(aggregation.op, aggs.MeanOp) and input_type is dtypes.TIMEDELTA_DTYPE:
+    if isinstance(aggregation.op, aggs.MeanOp) and input_type == dtypes.TIMEDELTA_DTYPE:
         return ex.UnaryAggregation(
             aggs.MeanOp(should_floor_result=True), aggregation.arg
         )
 
     if (
         isinstance(aggregation.op, aggs.QuantileOp)
-        and input_type is dtypes.TIMEDELTA_DTYPE
+        and input_type == dtypes.TIMEDELTA_DTYPE
     ):
         return ex.UnaryAggregation(
             aggs.QuantileOp(q=aggregation.op.q, should_floor_result=True),
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index a714f5804c..0ae4516dfd 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -142,7 +142,7 @@ class SumOp(UnaryAggregateOp):
     name: ClassVar[str] = "sum"
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+        if input_types[0] == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
         if dtypes.is_numeric(input_types[0]):
@@ -185,7 +185,7 @@ def order_independent(self) -> bool:
         return True
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+        if input_types[0] == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
         return signatures.UNARY_REAL_NUMERIC.output_type(input_types[0])
 
@@ -233,7 +233,7 @@ class MeanOp(UnaryAggregateOp):
     should_floor_result: bool = False
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+        if input_types[0] == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
         return signatures.UNARY_REAL_NUMERIC.output_type(input_types[0])
 
@@ -275,7 +275,7 @@ class StdOp(UnaryAggregateOp):
     should_floor_result: bool = False
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
-        if input_types[0] is dtypes.TIMEDELTA_DTYPE:
+        if input_types[0] == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
         return signatures.FixedOutputType(
diff --git a/bigframes/operations/numeric_ops.py b/bigframes/operations/numeric_ops.py
index ae23aff707..d06d6eb336 100644
--- a/bigframes/operations/numeric_ops.py
+++ b/bigframes/operations/numeric_ops.py
@@ -124,9 +124,9 @@ def output_type(self, *input_types):
             return input_types[0]
 
         # Temporal addition.
-        if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
+        if dtypes.is_datetime_like(left_type) and right_type == dtypes.TIMEDELTA_DTYPE:
             return left_type
-        if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right_type):
+        if left_type == dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(right_type):
             return right_type
 
         if left_type == dtypes.DATE_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
@@ -135,7 +135,7 @@ def output_type(self, *input_types):
         if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.DATE_DTYPE:
             return dtypes.DATETIME_DTYPE
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
+        if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
         if (left_type is None or dtypes.is_numeric(left_type)) and (
@@ -164,13 +164,13 @@ def output_type(self, *input_types):
         if left_type == dtypes.DATE_DTYPE and right_type == dtypes.DATE_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
-        if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
+        if dtypes.is_datetime_like(left_type) and right_type == dtypes.TIMEDELTA_DTYPE:
             return left_type
 
         if left_type == dtypes.DATE_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.DATETIME_DTYPE
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
+        if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
         if (left_type is None or dtypes.is_numeric(left_type)) and (
@@ -193,9 +193,9 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         left_type = input_types[0]
         right_type = input_types[1]
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
+        if left_type == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
             return dtypes.TIMEDELTA_DTYPE
-        if dtypes.is_numeric(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
+        if dtypes.is_numeric(left_type) and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
 
         if (left_type is None or dtypes.is_numeric(left_type)) and (
@@ -217,10 +217,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         left_type = input_types[0]
         right_type = input_types[1]
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
+        if left_type == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
             return dtypes.TIMEDELTA_DTYPE
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
+        if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.FLOAT_DTYPE
 
         if (left_type is None or dtypes.is_numeric(left_type)) and (
@@ -244,10 +244,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         left_type = input_types[0]
         right_type = input_types[1]
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
+        if left_type == dtypes.TIMEDELTA_DTYPE and dtypes.is_numeric(right_type):
             return dtypes.TIMEDELTA_DTYPE
 
-        if left_type is dtypes.TIMEDELTA_DTYPE and right_type is dtypes.TIMEDELTA_DTYPE:
+        if left_type == dtypes.TIMEDELTA_DTYPE and right_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.INT_DTYPE
 
         if (left_type is None or dtypes.is_numeric(left_type)) and (
diff --git a/bigframes/operations/timedelta_ops.py b/bigframes/operations/timedelta_ops.py
index b831e3f864..5e9a1189e4 100644
--- a/bigframes/operations/timedelta_ops.py
+++ b/bigframes/operations/timedelta_ops.py
@@ -46,7 +46,7 @@ class TimedeltaFloorOp(base_ops.UnaryOp):
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
         input_type = input_types[0]
-        if dtypes.is_numeric(input_type) or input_type is dtypes.TIMEDELTA_DTYPE:
+        if dtypes.is_numeric(input_type) or input_type == dtypes.TIMEDELTA_DTYPE:
             return dtypes.TIMEDELTA_DTYPE
         raise TypeError(f"unsupported type: {input_type}")
 
@@ -62,11 +62,11 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         # timestamp + timedelta => timestamp
         if (
             dtypes.is_datetime_like(input_types[0])
-            and input_types[1] is dtypes.TIMEDELTA_DTYPE
+            and input_types[1] == dtypes.TIMEDELTA_DTYPE
         ):
             return input_types[0]
         # timedelta + timestamp => timestamp
-        if input_types[0] is dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(
+        if input_types[0] == dtypes.TIMEDELTA_DTYPE and dtypes.is_datetime_like(
             input_types[1]
         ):
             return input_types[1]
@@ -87,7 +87,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         # timestamp - timedelta => timestamp
         if (
             dtypes.is_datetime_like(input_types[0])
-            and input_types[1] is dtypes.TIMEDELTA_DTYPE
+            and input_types[1] == dtypes.TIMEDELTA_DTYPE
         ):
             return input_types[0]
 
diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py
index 53cb5f7419..0cf394e454 100644
--- a/tests/system/small/operations/test_timedeltas.py
+++ b/tests/system/small/operations/test_timedeltas.py
@@ -58,7 +58,8 @@ def temporal_dfs(session):
                 pd.Timedelta(-4, "m"),
                 pd.Timedelta(6, "h"),
             ],
-            "numeric_col": [1.5, 2, -3],
+            "float_col": [1.5, 2, -3],
+            "int_col": [1, 2, -3],
         }
     )
 
@@ -92,10 +93,10 @@ def _assert_series_equal(actual: pd.Series, expected: pd.Series):
         (operator.sub, "timedelta_col_1", "timedelta_col_2"),
         (operator.truediv, "timedelta_col_1", "timedelta_col_2"),
         (operator.floordiv, "timedelta_col_1", "timedelta_col_2"),
-        (operator.truediv, "timedelta_col_1", "numeric_col"),
-        (operator.floordiv, "timedelta_col_1", "numeric_col"),
-        (operator.mul, "timedelta_col_1", "numeric_col"),
-        (operator.mul, "numeric_col", "timedelta_col_1"),
+        (operator.truediv, "timedelta_col_1", "float_col"),
+        (operator.floordiv, "timedelta_col_1", "float_col"),
+        (operator.mul, "timedelta_col_1", "float_col"),
+        (operator.mul, "float_col", "timedelta_col_1"),
     ],
 )
 def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2):
@@ -117,7 +118,7 @@ def test_timedelta_binary_ops_between_series(temporal_dfs, op, col_1, col_2):
         (operator.truediv, "timedelta_col_1", 3),
         (operator.floordiv, "timedelta_col_1", 3),
         (operator.mul, "timedelta_col_1", 3),
-        (operator.mul, "numeric_col", pd.Timedelta(1, "s")),
+        (operator.mul, "float_col", pd.Timedelta(1, "s")),
     ],
 )
 def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal):
@@ -136,10 +137,10 @@ def test_timedelta_binary_ops_series_and_literal(temporal_dfs, op, col, literal)
         (operator.sub, "timedelta_col_1", pd.Timedelta(2, "s")),
         (operator.truediv, "timedelta_col_1", pd.Timedelta(2, "s")),
         (operator.floordiv, "timedelta_col_1", pd.Timedelta(2, "s")),
-        (operator.truediv, "numeric_col", pd.Timedelta(2, "s")),
-        (operator.floordiv, "numeric_col", pd.Timedelta(2, "s")),
+        (operator.truediv, "float_col", pd.Timedelta(2, "s")),
+        (operator.floordiv, "float_col", pd.Timedelta(2, "s")),
         (operator.mul, "timedelta_col_1", 3),
-        (operator.mul, "numeric_col", pd.Timedelta(1, "s")),
+        (operator.mul, "float_col", pd.Timedelta(1, "s")),
     ],
 )
 def test_timedelta_binary_ops_literal_and_series(temporal_dfs, op, col, literal):
@@ -181,6 +182,16 @@ def test_timestamp_add__ts_series_plus_td_series(temporal_dfs, column, pd_dtype)
     )
 
 
+@pytest.mark.parametrize("column", ["datetime_col", "timestamp_col"])
+def test_timestamp_add__ts_series_plus_td_series__explicit_cast(temporal_dfs, column):
+    bf_df, _ = temporal_dfs
+    dtype = pd.ArrowDtype(pa.duration("us"))
+
+    actual_result = bf_df[column] + bf_df["int_col"].astype(dtype)
+
+    assert len(actual_result) > 0
+
+
 @pytest.mark.parametrize(
     "literal",
     [

From 5273d36343ccab30ec7fbefdd88121ef8c986df7 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Tue, 11 Mar 2025 16:14:44 -0700
Subject: [PATCH 19/19] chore(main): release 1.40.0 (#1466)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md                              | 24 +++++++++++++++++++++++
 bigframes/version.py                      |  4 ++--
 third_party/bigframes_vendored/version.py |  4 ++--
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9617d97c58..78ecfa53d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,30 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.40.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.39.0...v1.40.0) (2025-03-11)
+
+
+### ⚠ BREAKING CHANGES
+
+* reading JSON data as a custom arrow extension type ([#1458](https://github.com/googleapis/python-bigquery-dataframes/issues/1458))
+
+### Features
+
+* Reading JSON data as a custom arrow extension type ([#1458](https://github.com/googleapis/python-bigquery-dataframes/issues/1458)) ([e720f41](https://github.com/googleapis/python-bigquery-dataframes/commit/e720f41ef643ac14ae94fa98de5ef4a3fd6dde93))
+* Support list output for managed function ([#1457](https://github.com/googleapis/python-bigquery-dataframes/issues/1457)) ([461e9e0](https://github.com/googleapis/python-bigquery-dataframes/commit/461e9e017d513376fc623a5ee47f8b9dd002b452))
+
+
+### Bug Fixes
+
+* Fix list-like indexers in partial ordering mode ([#1456](https://github.com/googleapis/python-bigquery-dataframes/issues/1456)) ([fe72ada](https://github.com/googleapis/python-bigquery-dataframes/commit/fe72ada9cebb32947560c97567d7937c8b618f0d))
+* Fix the merge issue between 1424 and 1373 ([#1461](https://github.com/googleapis/python-bigquery-dataframes/issues/1461)) ([7b6e361](https://github.com/googleapis/python-bigquery-dataframes/commit/7b6e3615f8d4531beb4b59ca1223927112e713da))
+* Use `==` instead of `is` for timedelta type equality checks ([#1480](https://github.com/googleapis/python-bigquery-dataframes/issues/1480)) ([0db248b](https://github.com/googleapis/python-bigquery-dataframes/commit/0db248b5597a3966ac3dee1cca849509e48f4648))
+
+
+### Performance Improvements
+
+* Compilation no longer bounded by recursion ([#1464](https://github.com/googleapis/python-bigquery-dataframes/issues/1464)) ([27ab028](https://github.com/googleapis/python-bigquery-dataframes/commit/27ab028cdc45296923b12446c77b344af4208a3a))
+
 ## [1.39.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.38.0...v1.39.0) (2025-03-05)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index f743c7e94d..e4062aa0c6 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.39.0"
+__version__ = "1.40.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-03-05"
+__release_date__ = "2025-03-11"
 # {x-release-please-end}
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index f743c7e94d..e4062aa0c6 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.39.0"
+__version__ = "1.40.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-03-05"
+__release_date__ = "2025-03-11"
 # {x-release-please-end}