From 796fc3ed59bc0bd79f566ce2bc604f4cf2079fb3 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Mon, 6 Jan 2025 11:23:09 -0800
Subject: [PATCH 01/22] chore: add experimental blob.image_blur function
 (#1256)

* chore: add experimental blob.image_blur function

* apply to obj_ref

* docs

* fix mypy
---
 bigframes/blob/_functions.py                 | 130 +++++++++++++++++++
 bigframes/core/compile/scalar_op_compiler.py |  12 ++
 bigframes/operations/__init__.py             |  17 +++
 bigframes/operations/blob.py                 |  51 ++++++++
 4 files changed, 210 insertions(+)
 create mode 100644 bigframes/blob/_functions.py

diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
new file mode 100644
index 0000000000..4b3841252c
--- /dev/null
+++ b/bigframes/blob/_functions.py
@@ -0,0 +1,130 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+import inspect
+from typing import Callable, Iterable
+
+import google.cloud.bigquery as bigquery
+
+import bigframes
+import bigframes.session._io.bigquery as bf_io_bigquery
+
+_PYTHON_TO_BQ_TYPES = {int: "INT64", float: "FLOAT64", str: "STRING", bytes: "BYTES"}
+
+
+@dataclass(frozen=True)
+class FunctionDef:
+    """Definition of a Python UDF."""
+
+    func: Callable  # function body
+    requirements: Iterable[str]  # required packages
+
+
+# TODO(garrettwu): migrate to bigframes UDF when it is available
+class TransformFunction:
+    """Simple transform function class to deal with Python UDF."""
+
+    def __init__(
+        self, func_def: FunctionDef, session: bigframes.Session, connection: str
+    ):
+        self._func = func_def.func
+        self._requirements = func_def.requirements
+        self._session = session
+        self._connection = connection
+
+    def _input_bq_signature(self):
+        sig = inspect.signature(self._func)
+        inputs = []
+        for k, v in sig.parameters.items():
+            inputs.append(f"{k} {_PYTHON_TO_BQ_TYPES[v.annotation]}")
+        return ", ".join(inputs)
+
+    def _output_bq_type(self):
+        sig = inspect.signature(self._func)
+        return _PYTHON_TO_BQ_TYPES[sig.return_annotation]
+
+    def _create_udf(self):
+        """Create Python UDF in BQ. Return name of the UDF."""
+        udf_name = str(self._session._loader._storage_manager._random_table())
+
+        func_body = inspect.getsource(self._func)
+        func_name = self._func.__name__
+        packages = str(list(self._requirements))
+
+        sql = f"""
+CREATE OR REPLACE FUNCTION `{udf_name}`({self._input_bq_signature()})
+RETURNS {self._output_bq_type()} LANGUAGE python
+WITH CONNECTION `{self._connection}`
+OPTIONS (entry_point='{func_name}', runtime_version='python-3.11', packages={packages})
+AS r\"\"\"
+
+
+{func_body}
+
+
+\"\"\"
+        """
+
+        bf_io_bigquery.start_query_with_client(
+            self._session.bqclient,
+            sql,
+            job_config=bigquery.QueryJobConfig(),
+            metrics=self._session._metrics,
+        )
+
+        return udf_name
+
+    def udf(self):
+        """Create and return the UDF object."""
+        udf_name = self._create_udf()
+        return self._session.read_gbq_function(udf_name)
+
+
+# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string.
+def image_blur_func(
+    src_obj_ref_rt: str, dst_obj_ref_rt: str, ksize_x: int, ksize_y: int
+) -> str:
+    import json
+
+    import cv2 as cv  # type: ignore
+    import numpy as np
+    import requests
+
+    src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
+    dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt)
+
+    src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
+    dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"]
+
+    response = requests.get(src_url)
+    bts = response.content
+
+    nparr = np.frombuffer(bts, np.uint8)
+    img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
+    img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y))
+    bts = cv.imencode(".jpeg", img_blurred)[1].tobytes()
+
+    requests.put(
+        url=dst_url,
+        data=bts,
+        headers={
+            "Content-Type": "image/jpeg",
+        },
+    )
+
+    return dst_obj_ref_rt
+
+
+image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index d824009fec..2b85a97483 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -1210,6 +1210,11 @@ def json_extract_string_array_op_impl(
     return json_extract_string_array(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.ToJSONString)
+def to_json_string_op_impl(json_obj: ibis_types.Value):
+    return to_json_string(json_obj=json_obj)
+
+
 # Blob Ops
 @scalar_op_compiler.register_unary_op(ops.obj_fetch_metadata_op)
 def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):
@@ -1909,6 +1914,13 @@ def json_extract_string_array(  # type: ignore[empty-body]
     """Extracts a JSON array and converts it to a SQL ARRAY of STRINGs."""
 
 
+@ibis_udf.scalar.builtin(name="to_json_string")
+def to_json_string(  # type: ignore[empty-body]
+    json_obj: ibis_dtypes.JSON,
+) -> ibis_dtypes.String:
+    """Convert JSON to STRING."""
+
+
 @ibis_udf.scalar.builtin(name="ML.DISTANCE")
 def vector_distance(vector1, vector2, type: str) -> ibis_dtypes.Float64:  # type: ignore[empty-body]
     """Computes the distance between two vectors using specified type ("EUCLIDEAN", "MANHATTAN", or "COSINE")"""
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 2884d56551..03d9d60d5f 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -740,6 +740,23 @@ def output_type(self, *input_types):
         )
 
 
+@dataclasses.dataclass(frozen=True)
+class ToJSONString(UnaryOp):
+    name: typing.ClassVar[str] = "to_json_string"
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if not dtypes.is_json_like(input_type):
+            raise TypeError(
+                "Input type must be an valid JSON object or JSON-formatted string type."
+                + f" Received type: {input_type}"
+            )
+        return dtypes.STRING_DTYPE
+
+
+to_json_string_op = ToJSONString()
+
+
 ## Blob Ops
 @dataclasses.dataclass(frozen=True)
 class ObjGetAccessUrl(UnaryOp):
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index c074c72971..898d56ab83 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -14,9 +14,12 @@
 
 from __future__ import annotations
 
+from typing import Optional
+
 import IPython.display as ipy_display
 import requests
 
+from bigframes import clients
 from bigframes.operations import base
 import bigframes.operations as ops
 import bigframes.series
@@ -66,3 +69,51 @@ def display(self, n: int = 3):
             read_url = str(read_url).strip('"')
             response = requests.get(read_url)
             ipy_display.display(ipy_display.Image(response.content))
+
+    def image_blur(
+        self,
+        ksize: tuple[int, int],
+        *,
+        dst: bigframes.series.Series,
+        connection: Optional[str] = None,
+    ) -> bigframes.series.Series:
+        """Blurs images.
+
+        .. note::
+            BigFrames Blob is still under experiments. It may not work and subject to change in the future.
+
+        Args:
+            ksize (tuple(int, int)): Kernel size.
+            dst (bigframes.series.Series): Destination blob series.
+            connection (str or None, default None): BQ connection used for internet transactions. If None, uses default connection of the session.
+
+        Returns:
+            JSON: Runtime info of the Blob.
+        """
+        import bigframes.blob._functions as blob_func
+
+        connection = connection or self._block.session._bq_connection
+        connection = clients.resolve_full_bq_connection_name(
+            connection,
+            default_project=self._block.session._project,
+            default_location=self._block.session._location,
+        )
+
+        image_blur_udf = blob_func.TransformFunction(
+            blob_func.image_blur_def,
+            session=self._block.session,
+            connection=connection,
+        ).udf()
+
+        src_rt = bigframes.series.Series(self._block)._apply_unary_op(
+            ops.ObjGetAccessUrl(mode="R")
+        )
+        dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW"))
+
+        src_rt = src_rt._apply_unary_op(ops.to_json_string_op)
+        dst_rt = dst_rt._apply_unary_op(ops.to_json_string_op)
+
+        df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
+        df["ksize_x"], df["ksize_y"] = ksize
+
+        return df.apply(image_blur_udf, axis=1)

From 8077ff49426b103dc5a52eeb86a2c6a869c99825 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Mon, 6 Jan 2025 13:55:19 -0800
Subject: [PATCH 02/22] feat: add max_retries to TextEmbeddingGenerator and
 Claude3TextGenerator (#1259)

* chore: fix wordings of Gemini max_retries

* feat: add max_retries to TextEmbeddingGenerator and Claude3TextGenerator

---------

Co-authored-by: Shuowei Li <shuowei.l@outlook.com>
---
 bigframes/ml/base.py              |  64 ++++++-
 bigframes/ml/llm.py               | 109 ++++++-----
 tests/system/small/ml/test_llm.py | 288 +++++++++++++++++++++++++++---
 3 files changed, 369 insertions(+), 92 deletions(-)

diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index 4058647adb..a2c122f8c7 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -22,7 +22,8 @@
 """
 
 import abc
-from typing import cast, Optional, TypeVar
+from typing import Callable, cast, Mapping, Optional, TypeVar
+import warnings
 
 import bigframes_vendored.sklearn.base
 
@@ -77,6 +78,9 @@ def fit_transform(self, x_train: Union[DataFrame, Series], y_train: Union[DataFr
                 ...
     """
 
+    def __init__(self):
+        self._bqml_model: Optional[core.BqmlModel] = None
+
     def __repr__(self):
         """Print the estimator's constructor with all non-default parameter values."""
 
@@ -95,9 +99,6 @@ def __repr__(self):
 class Predictor(BaseEstimator):
     """A BigQuery DataFrames ML Model base class that can be used to predict outputs."""
 
-    def __init__(self):
-        self._bqml_model: Optional[core.BqmlModel] = None
-
     @abc.abstractmethod
     def predict(self, X):
         pass
@@ -213,12 +214,61 @@ def fit(
         return self._fit(X, y)
 
 
+class RetriableRemotePredictor(BaseEstimator):
+    @property
+    @abc.abstractmethod
+    def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]:
+        pass
+
+    @property
+    @abc.abstractmethod
+    def _status_col(self) -> str:
+        pass
+
+    def _predict_and_retry(
+        self, X: bpd.DataFrame, options: Mapping, max_retries: int
+    ) -> bpd.DataFrame:
+        assert self._bqml_model is not None
+
+        df_result = bpd.DataFrame(session=self._bqml_model.session)  # placeholder
+        df_fail = X
+        for _ in range(max_retries + 1):
+            df = self._predict_func(df_fail, options)
+
+            success = df[self._status_col].str.len() == 0
+            df_succ = df[success]
+            df_fail = df[~success]
+
+            if df_succ.empty:
+                if max_retries > 0:
+                    warnings.warn(
+                        "Can't make any progress, stop retrying.", RuntimeWarning
+                    )
+                break
+
+            df_result = (
+                bpd.concat([df_result, df_succ]) if not df_result.empty else df_succ
+            )
+
+            if df_fail.empty:
+                break
+
+        if not df_fail.empty:
+            warnings.warn(
+                f"Some predictions failed. Check column {self._status_col} for detailed status. You may want to filter the failed rows and retry.",
+                RuntimeWarning,
+            )
+
+        df_result = cast(
+            bpd.DataFrame,
+            bpd.concat([df_result, df_fail]) if not df_result.empty else df_fail,
+        )
+        return df_result
+
+
 class BaseTransformer(BaseEstimator):
     """Transformer base class."""
 
-    def __init__(self):
-        self._bqml_model: Optional[core.BqmlModel] = None
-
     @abc.abstractmethod
     def _keys(self):
         pass
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index d42138b006..e6825f80bb 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -16,7 +16,7 @@
 
 from __future__ import annotations
 
-from typing import cast, Literal, Optional
+from typing import Callable, cast, Literal, Mapping, Optional
 import warnings
 
 import bigframes_vendored.constants as constants
@@ -616,7 +616,7 @@ def to_gbq(
 
 
 @log_adapter.class_logger
-class TextEmbeddingGenerator(base.BaseEstimator):
+class TextEmbeddingGenerator(base.RetriableRemotePredictor):
     """Text embedding generator LLM model.
 
     Args:
@@ -715,18 +715,33 @@ def _from_bq(
         model._bqml_model = core.BqmlModel(session, bq_model)
         return model
 
-    def predict(self, X: utils.ArrayType) -> bpd.DataFrame:
+    @property
+    def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]:
+        return self._bqml_model.generate_embedding
+
+    @property
+    def _status_col(self) -> str:
+        return _ML_GENERATE_EMBEDDING_STATUS
+
+    def predict(self, X: utils.ArrayType, *, max_retries: int = 0) -> bpd.DataFrame:
         """Predict the result from input DataFrame.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
                 Input DataFrame or Series, can contain one or more columns. If multiple columns are in the DataFrame, it must contain a "content" column for prediction.
 
+            max_retries (int, default 0):
+                Max number of retries if the prediction for any rows failed. Each try needs to make progress (i.e. has successfully predicted rows) to continue the retry.
+                Each retry will append newly succeeded rows. When the max retries are reached, the remaining rows (the ones without successful predictions) will be appended to the end of the result.
+
         Returns:
             bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
         """
+        if max_retries < 0:
+            raise ValueError(
+                f"max_retries must be larger than or equal to 0, but is {max_retries}."
+            )
 
-        # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
         (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
 
         if len(X.columns) == 1:
@@ -738,15 +753,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame:
             "flatten_json_output": True,
         }
 
-        df = self._bqml_model.generate_embedding(X, options)
-
-        if (df[_ML_GENERATE_EMBEDDING_STATUS] != "").any():
-            warnings.warn(
-                f"Some predictions failed. Check column {_ML_GENERATE_EMBEDDING_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
-            )
-
-        return df
+        return self._predict_and_retry(X, options=options, max_retries=max_retries)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> TextEmbeddingGenerator:
         """Save the model to BigQuery.
@@ -765,7 +772,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TextEmbeddingGenerat
 
 
 @log_adapter.class_logger
-class GeminiTextGenerator(base.BaseEstimator):
+class GeminiTextGenerator(base.RetriableRemotePredictor):
     """Gemini text generator LLM model.
 
     Args:
@@ -891,6 +898,14 @@ def _bqml_options(self) -> dict:
         }
         return options
 
+    @property
+    def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]:
+        return self._bqml_model.generate_text
+
+    @property
+    def _status_col(self) -> str:
+        return _ML_GENERATE_TEXT_STATUS
+
     def fit(
         self,
         X: utils.ArrayType,
@@ -1028,41 +1043,7 @@ def predict(
             "ground_with_google_search": ground_with_google_search,
         }
 
-        df_result = bpd.DataFrame(session=self._bqml_model.session)  # placeholder
-        df_fail = X
-        for _ in range(max_retries + 1):
-            df = self._bqml_model.generate_text(df_fail, options)
-
-            success = df[_ML_GENERATE_TEXT_STATUS].str.len() == 0
-            df_succ = df[success]
-            df_fail = df[~success]
-
-            if df_succ.empty:
-                if max_retries > 0:
-                    warnings.warn(
-                        "Can't make any progress, stop retrying.", RuntimeWarning
-                    )
-                break
-
-            df_result = (
-                bpd.concat([df_result, df_succ]) if not df_result.empty else df_succ
-            )
-
-            if df_fail.empty:
-                break
-
-        if not df_fail.empty:
-            warnings.warn(
-                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
-            )
-
-        df_result = cast(
-            bpd.DataFrame,
-            bpd.concat([df_result, df_fail]) if not df_result.empty else df_fail,
-        )
-
-        return df_result
+        return self._predict_and_retry(X, options=options, max_retries=max_retries)
 
     def score(
         self,
@@ -1144,7 +1125,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> GeminiTextGenerator:
 
 
 @log_adapter.class_logger
-class Claude3TextGenerator(base.BaseEstimator):
+class Claude3TextGenerator(base.RetriableRemotePredictor):
     """Claude3 text generator LLM model.
 
     Go to Google Cloud Console -> Vertex AI -> Model Garden page to enabe the models before use. Must have the Consumer Procurement Entitlement Manager Identity and Access Management (IAM) role to enable the models.
@@ -1273,6 +1254,14 @@ def _bqml_options(self) -> dict:
         }
         return options
 
+    @property
+    def _predict_func(self) -> Callable[[bpd.DataFrame, Mapping], bpd.DataFrame]:
+        return self._bqml_model.generate_text
+
+    @property
+    def _status_col(self) -> str:
+        return _ML_GENERATE_TEXT_STATUS
+
     def predict(
         self,
         X: utils.ArrayType,
@@ -1280,6 +1269,7 @@ def predict(
         max_output_tokens: int = 128,
         top_k: int = 40,
         top_p: float = 0.95,
+        max_retries: int = 0,
     ) -> bpd.DataFrame:
         """Predict the result from input DataFrame.
 
@@ -1307,6 +1297,10 @@ def predict(
                 Specify a lower value for less random responses and a higher value for more random responses.
                 Default 0.95. Possible values [0.0, 1.0].
 
+            max_retries (int, default 0):
+                Max number of retries if the prediction for any rows failed. Each try needs to make progress (i.e. has successfully predicted rows) to continue the retry.
+                Each retry will append newly succeeded rows. When the max retries are reached, the remaining rows (the ones without successful predictions) will be appended to the end of the result.
+
 
         Returns:
             bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values.
@@ -1324,6 +1318,11 @@ def predict(
         if top_p < 0.0 or top_p > 1.0:
             raise ValueError(f"top_p must be [0.0, 1.0], but is {top_p}.")
 
+        if max_retries < 0:
+            raise ValueError(
+                f"max_retries must be larger than or equal to 0, but is {max_retries}."
+            )
+
         (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
 
         if len(X.columns) == 1:
@@ -1338,15 +1337,7 @@ def predict(
             "flatten_json_output": True,
         }
 
-        df = self._bqml_model.generate_text(X, options)
-
-        if (df[_ML_GENERATE_TEXT_STATUS] != "").any():
-            warnings.warn(
-                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
-            )
-
-        return df
+        return self._predict_and_retry(X, options=options, max_retries=max_retries)
 
     def to_gbq(self, model_name: str, replace: bool = False) -> Claude3TextGenerator:
         """Save the model to BigQuery.
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 304204cc7b..29f504443a 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -381,7 +381,35 @@ def __eq__(self, other):
         return self.equals(other)
 
 
-def test_gemini_text_generator_retry_success(session, bq_connection):
+@pytest.mark.parametrize(
+    (
+        "model_class",
+        "options",
+    ),
+    [
+        (
+            llm.GeminiTextGenerator,
+            {
+                "temperature": 0.9,
+                "max_output_tokens": 8192,
+                "top_k": 40,
+                "top_p": 1.0,
+                "flatten_json_output": True,
+                "ground_with_google_search": False,
+            },
+        ),
+        (
+            llm.Claude3TextGenerator,
+            {
+                "max_output_tokens": 128,
+                "top_k": 40,
+                "top_p": 0.95,
+                "flatten_json_output": True,
+            },
+        ),
+    ],
+)
+def test_text_generator_retry_success(session, bq_connection, model_class, options):
     # Requests.
     df0 = EqCmpAllDataFrame(
         {
@@ -455,22 +483,12 @@ def test_gemini_text_generator_retry_success(session, bq_connection):
             session=session,
         ),
     ]
-    options = {
-        "temperature": 0.9,
-        "max_output_tokens": 8192,
-        "top_k": 40,
-        "top_p": 1.0,
-        "flatten_json_output": True,
-        "ground_with_google_search": False,
-    }
 
-    gemini_text_generator_model = llm.GeminiTextGenerator(
-        connection_name=bq_connection, session=session
-    )
-    gemini_text_generator_model._bqml_model = mock_bqml_model
+    text_generator_model = model_class(connection_name=bq_connection, session=session)
+    text_generator_model._bqml_model = mock_bqml_model
 
     # 3rd retry isn't triggered
-    result = gemini_text_generator_model.predict(df0, max_retries=3)
+    result = text_generator_model.predict(df0, max_retries=3)
 
     mock_bqml_model.generate_text.assert_has_calls(
         [
@@ -497,7 +515,35 @@ def test_gemini_text_generator_retry_success(session, bq_connection):
     )
 
 
-def test_gemini_text_generator_retry_no_progress(session, bq_connection):
+@pytest.mark.parametrize(
+    (
+        "model_class",
+        "options",
+    ),
+    [
+        (
+            llm.GeminiTextGenerator,
+            {
+                "temperature": 0.9,
+                "max_output_tokens": 8192,
+                "top_k": 40,
+                "top_p": 1.0,
+                "flatten_json_output": True,
+                "ground_with_google_search": False,
+            },
+        ),
+        (
+            llm.Claude3TextGenerator,
+            {
+                "max_output_tokens": 128,
+                "top_k": 40,
+                "top_p": 0.95,
+                "flatten_json_output": True,
+            },
+        ),
+    ],
+)
+def test_text_generator_retry_no_progress(session, bq_connection, model_class, options):
     # Requests.
     df0 = EqCmpAllDataFrame(
         {
@@ -550,24 +596,214 @@ def test_gemini_text_generator_retry_no_progress(session, bq_connection):
             session=session,
         ),
     ]
+
+    text_generator_model = model_class(connection_name=bq_connection, session=session)
+    text_generator_model._bqml_model = mock_bqml_model
+
+    # No progress, only conduct retry once
+    result = text_generator_model.predict(df0, max_retries=3)
+
+    mock_bqml_model.generate_text.assert_has_calls(
+        [
+            mock.call(df0, options),
+            mock.call(df1, options),
+        ]
+    )
+    pd.testing.assert_frame_equal(
+        result.to_pandas(),
+        pd.DataFrame(
+            {
+                "ml_generate_text_status": ["", "error", "error"],
+                "prompt": [
+                    "What is BigQuery?",
+                    "What is BQML?",
+                    "What is BigQuery DataFrame?",
+                ],
+            },
+            index=[0, 1, 2],
+        ),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+def test_text_embedding_generator_retry_success(session, bq_connection):
+    # Requests.
+    df0 = EqCmpAllDataFrame(
+        {
+            "content": [
+                "What is BigQuery?",
+                "What is BQML?",
+                "What is BigQuery DataFrame?",
+            ]
+        },
+        index=[0, 1, 2],
+        session=session,
+    )
+    df1 = EqCmpAllDataFrame(
+        {
+            "ml_generate_embedding_status": ["error", "error"],
+            "content": [
+                "What is BQML?",
+                "What is BigQuery DataFrame?",
+            ],
+        },
+        index=[1, 2],
+        session=session,
+    )
+    df2 = EqCmpAllDataFrame(
+        {
+            "ml_generate_embedding_status": ["error"],
+            "content": [
+                "What is BQML?",
+            ],
+        },
+        index=[1],
+        session=session,
+    )
+
+    mock_bqml_model = mock.create_autospec(spec=core.BqmlModel)
+    type(mock_bqml_model).session = mock.PropertyMock(return_value=session)
+
+    # Responses. Retry twice then all succeeded.
+    mock_bqml_model.generate_embedding.side_effect = [
+        EqCmpAllDataFrame(
+            {
+                "ml_generate_embedding_status": ["", "error", "error"],
+                "content": [
+                    "What is BigQuery?",
+                    "What is BQML?",
+                    "What is BigQuery DataFrame?",
+                ],
+            },
+            index=[0, 1, 2],
+            session=session,
+        ),
+        EqCmpAllDataFrame(
+            {
+                "ml_generate_embedding_status": ["error", ""],
+                "content": [
+                    "What is BQML?",
+                    "What is BigQuery DataFrame?",
+                ],
+            },
+            index=[1, 2],
+            session=session,
+        ),
+        EqCmpAllDataFrame(
+            {
+                "ml_generate_embedding_status": [""],
+                "content": [
+                    "What is BQML?",
+                ],
+            },
+            index=[1],
+            session=session,
+        ),
+    ]
     options = {
-        "temperature": 0.9,
-        "max_output_tokens": 8192,
-        "top_k": 40,
-        "top_p": 1.0,
         "flatten_json_output": True,
-        "ground_with_google_search": False,
     }
 
-    gemini_text_generator_model = llm.GeminiTextGenerator(
+    text_embedding_model = llm.TextEmbeddingGenerator(
+        connection_name=bq_connection, session=session
+    )
+    text_embedding_model._bqml_model = mock_bqml_model
+
+    # 3rd retry isn't triggered
+    result = text_embedding_model.predict(df0, max_retries=3)
+
+    mock_bqml_model.generate_embedding.assert_has_calls(
+        [
+            mock.call(df0, options),
+            mock.call(df1, options),
+            mock.call(df2, options),
+        ]
+    )
+    pd.testing.assert_frame_equal(
+        result.to_pandas(),
+        pd.DataFrame(
+            {
+                "ml_generate_embedding_status": ["", "", ""],
+                "content": [
+                    "What is BigQuery?",
+                    "What is BigQuery DataFrame?",
+                    "What is BQML?",
+                ],
+            },
+            index=[0, 2, 1],
+        ),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+def test_text_embedding_generator_retry_no_progress(session, bq_connection):
+    # Requests.
+    df0 = EqCmpAllDataFrame(
+        {
+            "content": [
+                "What is BigQuery?",
+                "What is BQML?",
+                "What is BigQuery DataFrame?",
+            ]
+        },
+        index=[0, 1, 2],
+        session=session,
+    )
+    df1 = EqCmpAllDataFrame(
+        {
+            "ml_generate_embedding_status": ["error", "error"],
+            "content": [
+                "What is BQML?",
+                "What is BigQuery DataFrame?",
+            ],
+        },
+        index=[1, 2],
+        session=session,
+    )
+
+    mock_bqml_model = mock.create_autospec(spec=core.BqmlModel)
+    type(mock_bqml_model).session = mock.PropertyMock(return_value=session)
+    # Responses. Retry once, no progress, just stop.
+    mock_bqml_model.generate_embedding.side_effect = [
+        EqCmpAllDataFrame(
+            {
+                "ml_generate_embedding_status": ["", "error", "error"],
+                "content": [
+                    "What is BigQuery?",
+                    "What is BQML?",
+                    "What is BigQuery DataFrame?",
+                ],
+            },
+            index=[0, 1, 2],
+            session=session,
+        ),
+        EqCmpAllDataFrame(
+            {
+                "ml_generate_embedding_status": ["error", "error"],
+                "content": [
+                    "What is BQML?",
+                    "What is BigQuery DataFrame?",
+                ],
+            },
+            index=[1, 2],
+            session=session,
+        ),
+    ]
+    options = {
+        "flatten_json_output": True,
+    }
+
+    text_embedding_model = llm.TextEmbeddingGenerator(
         connection_name=bq_connection, session=session
     )
-    gemini_text_generator_model._bqml_model = mock_bqml_model
+    text_embedding_model._bqml_model = mock_bqml_model
 
     # No progress, only conduct retry once
-    result = gemini_text_generator_model.predict(df0, max_retries=3)
+    result = text_embedding_model.predict(df0, max_retries=3)
 
-    mock_bqml_model.generate_text.assert_has_calls(
+    mock_bqml_model.generate_embedding.assert_has_calls(
         [
             mock.call(df0, options),
             mock.call(df1, options),
@@ -577,8 +813,8 @@ def test_gemini_text_generator_retry_no_progress(session, bq_connection):
         result.to_pandas(),
         pd.DataFrame(
             {
-                "ml_generate_text_status": ["", "error", "error"],
-                "prompt": [
+                "ml_generate_embedding_status": ["", "error", "error"],
+                "content": [
                     "What is BigQuery?",
                     "What is BQML?",
                     "What is BigQuery DataFrame?",

From bf90741b8cd5bf238c03f86d081f3630f521ef60 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 6 Jan 2025 15:10:59 -0800
Subject: [PATCH 03/22] test: read_gbp works when table and column share a name
 (#1079)

---
 tests/system/small/test_session.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index eb63a9b72a..f722ccbe75 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -403,6 +403,17 @@ def test_read_gbq_on_linked_dataset_warns(session, source_table):
         assert warned[0].category == bigframes.exceptions.TimeTravelDisabledWarning
 
 
+def test_read_gbq_w_ambigous_name(
+    session: bigframes.Session,
+):
+    # Ensure read_gbq works when table and column share a name
+    df = session.read_gbq(
+        "bigframes-dev.bigframes_tests_sys.ambiguous_name"
+    ).to_pandas()
+    pd_df = pd.DataFrame({"x": [2, 1], "ambiguous_name": [20, 10]})
+    pd.testing.assert_frame_equal(df, pd_df, check_dtype=False, check_index_type=False)
+
+
 def test_read_gbq_table_clustered_with_filter(session: bigframes.Session):
     df = session.read_gbq_table(
         "bigquery-public-data.cloud_storage_geo_index.landsat_index",

From 58f13cb9ef8bac3222e5013d8ae77dd20f886e30 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei.l@outlook.com>
Date: Mon, 6 Jan 2025 19:28:59 -0800
Subject: [PATCH 04/22] docs: add bq studio links that allows users to generate
 Jupiter notebooks in bq studio with github contents (#1266)

* add new bq studio links that allows users to self import .ipynb from github

* revert semantic_operators

* add bq studio link for semantic_operators.ipynb

---------

Co-authored-by: Shuowei Li <shuowei@google.com>
---
 .../experimental/semantic_operators.ipynb     | 936 +++++++++---------
 .../bq_dataframes_llm_code_generation.ipynb   |   6 +
 .../bq_dataframes_llm_kmeans.ipynb            |   6 +
 ...q_dataframes_ml_drug_name_generation.ipynb |   6 +
 .../bq_dataframes_template.ipynb              |   7 +
 .../getting_started_bq_dataframes.ipynb       |   6 +
 .../ml_fundamentals_bq_dataframes.ipynb       |   6 +
 .../bq_dataframes_ml_linear_regression.ipynb  |   6 +
 .../remote_function_vertex_claude_model.ipynb |   6 +
 .../bq_dataframes_covid_line_graphs.ipynb     |   6 +
 10 files changed, 526 insertions(+), 465 deletions(-)

diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index f9c7f67358..b3b989dd43 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -44,6 +44,12 @@
         "      View on GitHub\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/experimental/semantic_operators.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },
@@ -135,24 +141,24 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "Specify your GCP project and location."
-      ],
       "metadata": {
         "id": "W8TPUvnsqxhv"
-      }
+      },
+      "source": [
+        "Specify your GCP project and location."
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "bpd.options.bigquery.project = 'YOUR_PROJECT_ID'\n",
-        "bpd.options.bigquery.location = 'US'"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "vCkraKOeqJFl"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "bpd.options.bigquery.project = 'YOUR_PROJECT_ID'\n",
+        "bpd.options.bigquery.location = 'US'"
+      ]
     },
     {
       "cell_type": "markdown",
@@ -293,16 +299,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "   country     city\n",
-              "0      USA  Seattle\n",
-              "1  Germany   Berlin\n",
-              "2    Japan    Kyoto\n",
-              "\n",
-              "[3 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -346,10 +343,19 @@
               "</table>\n",
               "<p>3 rows × 2 columns</p>\n",
               "</div>[3 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   country     city\n",
+              "0      USA  Seattle\n",
+              "1  Germany   Berlin\n",
+              "2    Japan    Kyoto\n",
+              "\n",
+              "[3 rows x 2 columns]"
             ]
           },
+          "execution_count": 10,
           "metadata": {},
-          "execution_count": 10
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -384,14 +390,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "   country    city\n",
-              "1  Germany  Berlin\n",
-              "\n",
-              "[1 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -425,10 +424,17 @@
               "</table>\n",
               "<p>1 rows × 2 columns</p>\n",
               "</div>[1 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   country    city\n",
+              "1  Germany  Berlin\n",
+              "\n",
+              "[1 rows x 2 columns]"
             ]
           },
+          "execution_count": 11,
           "metadata": {},
-          "execution_count": 11
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -477,16 +483,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "  ingredient_1 ingredient_2\n",
-              "0          Bun   Beef Patty\n",
-              "1     Soy Bean      Bittern\n",
-              "2      Sausage   Long Bread\n",
-              "\n",
-              "[3 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -530,10 +527,19 @@
               "</table>\n",
               "<p>3 rows × 2 columns</p>\n",
               "</div>[3 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "  ingredient_1 ingredient_2\n",
+              "0          Bun   Beef Patty\n",
+              "1     Soy Bean      Bittern\n",
+              "2      Sausage   Long Bread\n",
+              "\n",
+              "[3 rows x 2 columns]"
             ]
           },
+          "execution_count": 12,
           "metadata": {},
-          "execution_count": 12
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -566,19 +572,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "  ingredient_1 ingredient_2      food\n",
-              "0          Bun   Beef Patty  Burger \n",
-              "\n",
-              "1     Soy Bean      Bittern    Tofu \n",
-              "\n",
-              "2      Sausage   Long Bread  Hotdog \n",
-              "\n",
-              "\n",
-              "[3 rows x 3 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -626,10 +620,22 @@
               "</table>\n",
               "<p>3 rows × 3 columns</p>\n",
               "</div>[3 rows x 3 columns in total]"
+            ],
+            "text/plain": [
+              "  ingredient_1 ingredient_2      food\n",
+              "0          Bun   Beef Patty  Burger \n",
+              "\n",
+              "1     Soy Bean      Bittern    Tofu \n",
+              "\n",
+              "2      Sausage   Long Bread  Hotdog \n",
+              "\n",
+              "\n",
+              "[3 rows x 3 columns]"
             ]
           },
+          "execution_count": 13,
           "metadata": {},
-          "execution_count": 13
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -690,17 +696,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "        city      continent\n",
-              "0    Seattle  North America\n",
-              "1     Ottawa  North America\n",
-              "2   Shanghai           Asia\n",
-              "3  New Delhi           Asia\n",
-              "\n",
-              "[4 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -749,10 +745,20 @@
               "</table>\n",
               "<p>4 rows × 2 columns</p>\n",
               "</div>[4 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "        city      continent\n",
+              "0    Seattle  North America\n",
+              "1     Ottawa  North America\n",
+              "2   Shanghai           Asia\n",
+              "3  New Delhi           Asia\n",
+              "\n",
+              "[4 rows x 2 columns]"
             ]
           },
+          "execution_count": 15,
           "metadata": {},
-          "execution_count": 15
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -821,19 +827,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "  animal_left animal_right\n",
-              "0         cow          cat\n",
-              "1         cow       spider\n",
-              "2         cat       spider\n",
-              "3    elephant          cow\n",
-              "4    elephant          cat\n",
-              "5    elephant       spider\n",
-              "\n",
-              "[6 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -892,10 +886,22 @@
               "</table>\n",
               "<p>6 rows × 2 columns</p>\n",
               "</div>[6 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "  animal_left animal_right\n",
+              "0         cow          cat\n",
+              "1         cow       spider\n",
+              "2         cat       spider\n",
+              "3    elephant          cow\n",
+              "4    elephant          cat\n",
+              "5    elephant       spider\n",
+              "\n",
+              "[6 rows x 2 columns]"
             ]
           },
+          "execution_count": 17,
           "metadata": {},
-          "execution_count": 17
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -935,20 +941,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "                       Movies\n",
-              "0                     Titanic\n",
-              "1     The Wolf of Wall Street\n",
-              "2  Killers of the Flower Moon\n",
-              "3                The Revenant\n",
-              "4                   Inception\n",
-              "5              Shuttle Island\n",
-              "6            The Great Gatsby\n",
-              "\n",
-              "[7 rows x 1 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1004,10 +997,23 @@
               "</table>\n",
               "<p>7 rows × 1 columns</p>\n",
               "</div>[7 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "                       Movies\n",
+              "0                     Titanic\n",
+              "1     The Wolf of Wall Street\n",
+              "2  Killers of the Flower Moon\n",
+              "3                The Revenant\n",
+              "4                   Inception\n",
+              "5              Shuttle Island\n",
+              "6            The Great Gatsby\n",
+              "\n",
+              "[7 rows x 1 columns]"
             ]
           },
+          "execution_count": 18,
           "metadata": {},
-          "execution_count": 18
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1046,7 +1052,6 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
               "0    Titanic \n",
@@ -1054,8 +1059,9 @@
               "Name: Movies, dtype: string"
             ]
           },
+          "execution_count": 19,
           "metadata": {},
-          "execution_count": 19
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1123,15 +1129,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "      Animals\n",
-              "0       Corgi\n",
-              "1  Orange Cat\n",
-              "\n",
-              "[2 rows x 1 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1167,10 +1165,18 @@
               "</table>\n",
               "<p>2 rows × 1 columns</p>\n",
               "</div>[2 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "      Animals\n",
+              "0       Corgi\n",
+              "1  Orange Cat\n",
+              "\n",
+              "[2 rows x 1 columns]"
             ]
           },
+          "execution_count": 21,
           "metadata": {},
-          "execution_count": 21
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1217,18 +1223,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "    creatures\n",
-              "0      salmon\n",
-              "1  sea urchin\n",
-              "2     baboons\n",
-              "3        frog\n",
-              "4  chimpanzee\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1276,10 +1271,21 @@
               "</table>\n",
               "<p>5 rows × 1 columns</p>\n",
               "</div>[5 rows x 1 columns in total]"
+            ],
+            "text/plain": [
+              "    creatures\n",
+              "0      salmon\n",
+              "1  sea urchin\n",
+              "2     baboons\n",
+              "3        frog\n",
+              "4  chimpanzee\n",
+              "\n",
+              "[5 rows x 1 columns]"
             ]
           },
+          "execution_count": 22,
           "metadata": {},
-          "execution_count": 22
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1309,15 +1315,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "    creatures  similarity score\n",
-              "2     baboons          0.708434\n",
-              "4  chimpanzee          0.635844\n",
-              "\n",
-              "[2 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1356,10 +1354,18 @@
               "</table>\n",
               "<p>2 rows × 2 columns</p>\n",
               "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "    creatures  similarity score\n",
+              "2     baboons          0.708434\n",
+              "4  chimpanzee          0.635844\n",
+              "\n",
+              "[2 rows x 2 columns]"
             ]
           },
+          "execution_count": 23,
           "metadata": {},
-          "execution_count": 23
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1429,18 +1435,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "    animal  animal_1  distance\n",
-              "0   monkey    baboon  0.620521\n",
-              "1   spider  scorpion  0.728024\n",
-              "2   salmon      tuna  0.782141\n",
-              "3  giraffe  elephant    0.7135\n",
-              "4  sparrow       owl  0.810864\n",
-              "\n",
-              "[5 rows x 3 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1500,10 +1495,21 @@
               "</table>\n",
               "<p>5 rows × 3 columns</p>\n",
               "</div>[5 rows x 3 columns in total]"
+            ],
+            "text/plain": [
+              "    animal  animal_1  distance\n",
+              "0   monkey    baboon  0.620521\n",
+              "1   spider  scorpion  0.728024\n",
+              "2   salmon      tuna  0.782141\n",
+              "3  giraffe  elephant    0.7135\n",
+              "4  sparrow       owl  0.810864\n",
+              "\n",
+              "[5 rows x 3 columns]"
             ]
           },
+          "execution_count": 25,
           "metadata": {},
-          "execution_count": 25
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1570,18 +1576,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "        Product  Cluster ID\n",
-              "0    Smartphone           1\n",
-              "1        Laptop           1\n",
-              "2  Coffee Maker           1\n",
-              "3       T-shirt           1\n",
-              "4         Jeans           1\n",
-              "\n",
-              "[5 rows x 2 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -1635,10 +1630,21 @@
               "</table>\n",
               "<p>5 rows × 2 columns</p>\n",
               "</div>[5 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "        Product  Cluster ID\n",
+              "0    Smartphone           1\n",
+              "1        Laptop           1\n",
+              "2  Coffee Maker           1\n",
+              "3       T-shirt           1\n",
+              "4         Jeans           1\n",
+              "\n",
+              "[5 rows x 2 columns]"
             ]
           },
+          "execution_count": 27,
           "metadata": {},
-          "execution_count": 27
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -1687,93 +1693,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "                                                title  \\\n",
-              "0                                                <NA>   \n",
-              "1                                                <NA>   \n",
-              "2                                                <NA>   \n",
-              "3                                                <NA>   \n",
-              "4                                                <NA>   \n",
-              "5                                                <NA>   \n",
-              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-              "7                                                <NA>   \n",
-              "8   Eureca beta is live. A place for your business...   \n",
-              "9                                                <NA>   \n",
-              "10                                               <NA>   \n",
-              "11                                               <NA>   \n",
-              "12                                               <NA>   \n",
-              "13                                               <NA>   \n",
-              "14                                               <NA>   \n",
-              "15                        Discord vs. IRC Rough Notes   \n",
-              "16                                               <NA>   \n",
-              "17                                               <NA>   \n",
-              "18                                               <NA>   \n",
-              "19                                               <NA>   \n",
-              "20                                               <NA>   \n",
-              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-              "22             How Much Warmer Was Your City in 2016?   \n",
-              "23                                               <NA>   \n",
-              "24                       Working Best at Coffee Shops   \n",
-              "\n",
-              "                                                 text              by  score  \\\n",
-              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-              "6                                                <NA>         gaoprea      3   \n",
-              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-              "8                                                <NA>        ricardos      1   \n",
-              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-              "15                                               <NA>    todsacerdoti     48   \n",
-              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-              "21                                               <NA>       joshreads      1   \n",
-              "22                                               <NA>           smb06      1   \n",
-              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-              "24                                               <NA>  GiraffeNecktie    249   \n",
-              "\n",
-              "                    timestamp     type  \n",
-              "0   2021-06-26 02:37:56+00:00  comment  \n",
-              "1   2023-04-19 15:56:34+00:00  comment  \n",
-              "2   2018-10-16 01:07:55+00:00  comment  \n",
-              "3   2015-08-10 16:05:54+00:00  comment  \n",
-              "4   2020-10-05 11:20:51+00:00  comment  \n",
-              "5   2020-09-01 15:38:50+00:00  comment  \n",
-              "6   2011-09-27 22:43:27+00:00    story  \n",
-              "7   2011-09-16 19:02:01+00:00  comment  \n",
-              "8   2012-10-15 13:09:32+00:00    story  \n",
-              "9   2023-04-21 16:45:13+00:00  comment  \n",
-              "10  2023-01-19 20:05:54+00:00  comment  \n",
-              "11  2019-06-17 19:49:17+00:00  comment  \n",
-              "12  2018-03-18 18:57:21+00:00  comment  \n",
-              "13  2019-10-22 10:41:01+00:00  comment  \n",
-              "14  2017-05-05 20:45:37+00:00  comment  \n",
-              "15  2024-07-12 18:39:52+00:00    story  \n",
-              "16  2017-09-12 18:58:20+00:00  comment  \n",
-              "17  2021-04-13 01:24:03+00:00  comment  \n",
-              "18  2022-05-13 16:08:38+00:00  comment  \n",
-              "19  2017-09-28 05:16:06+00:00  comment  \n",
-              "20  2018-12-07 11:25:52+00:00  comment  \n",
-              "21  2014-04-08 13:29:50+00:00    story  \n",
-              "22  2017-02-16 23:26:34+00:00    story  \n",
-              "23  2022-01-31 17:08:57+00:00  comment  \n",
-              "24  2011-04-19 14:25:17+00:00    story  \n",
-              "...\n",
-              "\n",
-              "[3000 rows x 6 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -2031,10 +1951,96 @@
               "</table>\n",
               "<p>25 rows × 6 columns</p>\n",
               "</div>[3000 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3                                                <NA>   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
+              "7                                                <NA>   \n",
+              "8   Eureca beta is live. A place for your business...   \n",
+              "9                                                <NA>   \n",
+              "10                                               <NA>   \n",
+              "11                                               <NA>   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                        Discord vs. IRC Rough Notes   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
+              "22             How Much Warmer Was Your City in 2016?   \n",
+              "23                                               <NA>   \n",
+              "24                       Working Best at Coffee Shops   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
+              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
+              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
+              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
+              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
+              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
+              "6                                                <NA>         gaoprea      3   \n",
+              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
+              "8                                                <NA>        ricardos      1   \n",
+              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
+              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
+              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
+              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
+              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
+              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
+              "15                                               <NA>    todsacerdoti     48   \n",
+              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
+              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
+              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
+              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
+              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
+              "21                                               <NA>       joshreads      1   \n",
+              "22                                               <NA>           smb06      1   \n",
+              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
+              "24                                               <NA>  GiraffeNecktie    249   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2021-06-26 02:37:56+00:00  comment  \n",
+              "1   2023-04-19 15:56:34+00:00  comment  \n",
+              "2   2018-10-16 01:07:55+00:00  comment  \n",
+              "3   2015-08-10 16:05:54+00:00  comment  \n",
+              "4   2020-10-05 11:20:51+00:00  comment  \n",
+              "5   2020-09-01 15:38:50+00:00  comment  \n",
+              "6   2011-09-27 22:43:27+00:00    story  \n",
+              "7   2011-09-16 19:02:01+00:00  comment  \n",
+              "8   2012-10-15 13:09:32+00:00    story  \n",
+              "9   2023-04-21 16:45:13+00:00  comment  \n",
+              "10  2023-01-19 20:05:54+00:00  comment  \n",
+              "11  2019-06-17 19:49:17+00:00  comment  \n",
+              "12  2018-03-18 18:57:21+00:00  comment  \n",
+              "13  2019-10-22 10:41:01+00:00  comment  \n",
+              "14  2017-05-05 20:45:37+00:00  comment  \n",
+              "15  2024-07-12 18:39:52+00:00    story  \n",
+              "16  2017-09-12 18:58:20+00:00  comment  \n",
+              "17  2021-04-13 01:24:03+00:00  comment  \n",
+              "18  2022-05-13 16:08:38+00:00  comment  \n",
+              "19  2017-09-28 05:16:06+00:00  comment  \n",
+              "20  2018-12-07 11:25:52+00:00  comment  \n",
+              "21  2014-04-08 13:29:50+00:00    story  \n",
+              "22  2017-02-16 23:26:34+00:00    story  \n",
+              "23  2022-01-31 17:08:57+00:00  comment  \n",
+              "24  2011-04-19 14:25:17+00:00    story  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
             ]
           },
+          "execution_count": 9,
           "metadata": {},
-          "execution_count": 9
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2063,14 +2069,14 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
               "2556"
             ]
           },
+          "execution_count": 10,
           "metadata": {},
-          "execution_count": 10
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2099,14 +2105,14 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
               "390.05125195618155"
             ]
           },
+          "execution_count": 11,
           "metadata": {},
-          "execution_count": 11
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2156,25 +2162,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "     title                                               text              by  \\\n",
-              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-              "\n",
-              "      score                  timestamp     type  \n",
-              "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
-              "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
-              "815    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
-              "1516   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
-              "1563   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
-              "\n",
-              "[5 rows x 6 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -2252,10 +2240,28 @@
               "</table>\n",
               "<p>5 rows × 6 columns</p>\n",
               "</div>[5 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "     title                                               text              by  \\\n",
+              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
+              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+              "\n",
+              "      score                  timestamp     type  \n",
+              "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
+              "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
+              "815    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
+              "1516   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
+              "1563   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
+              "\n",
+              "[5 rows x 6 columns]"
             ]
           },
+          "execution_count": 13,
           "metadata": {},
-          "execution_count": 13
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2300,37 +2306,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "     title                                               text              by  \\\n",
-              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
-              "\n",
-              "      score                  timestamp     type  \\\n",
-              "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
-              "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
-              "815    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
-              "1516   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
-              "1563   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
-              "\n",
-              "                             sentiment  \n",
-              "9           Frustrated, but hopeful. \n",
-              "  \n",
-              "420            Frustrated and angry. \n",
-              "  \n",
-              "815            Excited anticipation. \n",
-              "  \n",
-              "1516  Frustrated, critical, obvious. \n",
-              "  \n",
-              "1563     Negative, clickbait, Apple. \n",
-              "  \n",
-              "\n",
-              "[5 rows x 7 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -2414,10 +2390,40 @@
               "</table>\n",
               "<p>5 rows × 7 columns</p>\n",
               "</div>[5 rows x 7 columns in total]"
+            ],
+            "text/plain": [
+              "     title                                               text              by  \\\n",
+              "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
+              "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+              "815   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+              "1516  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+              "1563  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+              "\n",
+              "      score                  timestamp     type  \\\n",
+              "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
+              "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
+              "815    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
+              "1516   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
+              "1563   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
+              "\n",
+              "                             sentiment  \n",
+              "9           Frustrated, but hopeful. \n",
+              "  \n",
+              "420            Frustrated and angry. \n",
+              "  \n",
+              "815            Excited anticipation. \n",
+              "  \n",
+              "1516  Frustrated, critical, obvious. \n",
+              "  \n",
+              "1563     Negative, clickbait, Apple. \n",
+              "  \n",
+              "\n",
+              "[5 rows x 7 columns]"
             ]
           },
+          "execution_count": 14,
           "metadata": {},
-          "execution_count": 14
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2446,93 +2452,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "                                                title  \\\n",
-              "0                                                <NA>   \n",
-              "1                                                <NA>   \n",
-              "2                                                <NA>   \n",
-              "3                                                <NA>   \n",
-              "4                                                <NA>   \n",
-              "5                                                <NA>   \n",
-              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
-              "7                                                <NA>   \n",
-              "8   Eureca beta is live. A place for your business...   \n",
-              "9                                                <NA>   \n",
-              "10                                               <NA>   \n",
-              "11                                               <NA>   \n",
-              "12                                               <NA>   \n",
-              "13                                               <NA>   \n",
-              "14                                               <NA>   \n",
-              "15                        Discord vs. IRC Rough Notes   \n",
-              "16                                               <NA>   \n",
-              "17                                               <NA>   \n",
-              "18                                               <NA>   \n",
-              "19                                               <NA>   \n",
-              "20                                               <NA>   \n",
-              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
-              "22             How Much Warmer Was Your City in 2016?   \n",
-              "23                                               <NA>   \n",
-              "24                       Working Best at Coffee Shops   \n",
-              "\n",
-              "                                                 text              by  score  \\\n",
-              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
-              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
-              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
-              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
-              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
-              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
-              "6                                                <NA>         gaoprea      3   \n",
-              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
-              "8                                                <NA>        ricardos      1   \n",
-              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
-              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
-              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
-              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
-              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
-              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
-              "15                                               <NA>    todsacerdoti     48   \n",
-              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
-              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
-              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
-              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
-              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
-              "21                                               <NA>       joshreads      1   \n",
-              "22                                               <NA>           smb06      1   \n",
-              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
-              "24                                               <NA>  GiraffeNecktie    249   \n",
-              "\n",
-              "                    timestamp     type  \n",
-              "0   2021-06-26 02:37:56+00:00  comment  \n",
-              "1   2023-04-19 15:56:34+00:00  comment  \n",
-              "2   2018-10-16 01:07:55+00:00  comment  \n",
-              "3   2015-08-10 16:05:54+00:00  comment  \n",
-              "4   2020-10-05 11:20:51+00:00  comment  \n",
-              "5   2020-09-01 15:38:50+00:00  comment  \n",
-              "6   2011-09-27 22:43:27+00:00    story  \n",
-              "7   2011-09-16 19:02:01+00:00  comment  \n",
-              "8   2012-10-15 13:09:32+00:00    story  \n",
-              "9   2023-04-21 16:45:13+00:00  comment  \n",
-              "10  2023-01-19 20:05:54+00:00  comment  \n",
-              "11  2019-06-17 19:49:17+00:00  comment  \n",
-              "12  2018-03-18 18:57:21+00:00  comment  \n",
-              "13  2019-10-22 10:41:01+00:00  comment  \n",
-              "14  2017-05-05 20:45:37+00:00  comment  \n",
-              "15  2024-07-12 18:39:52+00:00    story  \n",
-              "16  2017-09-12 18:58:20+00:00  comment  \n",
-              "17  2021-04-13 01:24:03+00:00  comment  \n",
-              "18  2022-05-13 16:08:38+00:00  comment  \n",
-              "19  2017-09-28 05:16:06+00:00  comment  \n",
-              "20  2018-12-07 11:25:52+00:00  comment  \n",
-              "21  2014-04-08 13:29:50+00:00    story  \n",
-              "22  2017-02-16 23:26:34+00:00    story  \n",
-              "23  2022-01-31 17:08:57+00:00  comment  \n",
-              "24  2011-04-19 14:25:17+00:00    story  \n",
-              "...\n",
-              "\n",
-              "[3000 rows x 6 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -2790,10 +2710,96 @@
               "</table>\n",
               "<p>25 rows × 6 columns</p>\n",
               "</div>[3000 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                                title  \\\n",
+              "0                                                <NA>   \n",
+              "1                                                <NA>   \n",
+              "2                                                <NA>   \n",
+              "3                                                <NA>   \n",
+              "4                                                <NA>   \n",
+              "5                                                <NA>   \n",
+              "6   The Impending NY Tech Apocalypse: Here's What ...   \n",
+              "7                                                <NA>   \n",
+              "8   Eureca beta is live. A place for your business...   \n",
+              "9                                                <NA>   \n",
+              "10                                               <NA>   \n",
+              "11                                               <NA>   \n",
+              "12                                               <NA>   \n",
+              "13                                               <NA>   \n",
+              "14                                               <NA>   \n",
+              "15                        Discord vs. IRC Rough Notes   \n",
+              "16                                               <NA>   \n",
+              "17                                               <NA>   \n",
+              "18                                               <NA>   \n",
+              "19                                               <NA>   \n",
+              "20                                               <NA>   \n",
+              "21  Oh dear: new Yahoo anti-spoofing measures brea...   \n",
+              "22             How Much Warmer Was Your City in 2016?   \n",
+              "23                                               <NA>   \n",
+              "24                       Working Best at Coffee Shops   \n",
+              "\n",
+              "                                                 text              by  score  \\\n",
+              "0   Well, most people aren&#x27;t alcoholics, so I...       slipframe   <NA>   \n",
+              "1   No, you don&#x27;t really <i>need</i> a smartp...        vetinari   <NA>   \n",
+              "2   It&#x27;s for the late Paul Allen RIP. Should&...        lsr_ssri   <NA>   \n",
+              "3    Yup they are dangerous. Be careful Donald Trump.           Sven7   <NA>   \n",
+              "4   Sure, it&#x27;s totally reasonable. Just point...       nicoburns   <NA>   \n",
+              "5   I wonder how long before special forces start ...   autisticcurio   <NA>   \n",
+              "6                                                <NA>         gaoprea      3   \n",
+              "7   Where would you relocate to? I'm assuming that...    pavel_lishin   <NA>   \n",
+              "8                                                <NA>        ricardos      1   \n",
+              "9   It doesn’t work on Safari, and WebKit based br...      archiewood   <NA>   \n",
+              "10  I guess I don’t see the relevance. Vegans eat ...         stevula   <NA>   \n",
+              "11  I remember watching the American news media go...         fareesh   <NA>   \n",
+              "12  This article is incorrectly using the current ...       stale2002   <NA>   \n",
+              "13  In the firm I made my internship, we have to u...    iserlohnmage   <NA>   \n",
+              "14  The main reason it requires unsafe is for memo...           comex   <NA>   \n",
+              "15                                               <NA>    todsacerdoti     48   \n",
+              "16     you have to auth again when you use apple pay.        empath75   <NA>   \n",
+              "17  It goes consumer grade, automotive, military, ...           moftz   <NA>   \n",
+              "18  I don&#x27;t have a link handy but the differe...    KennyBlanken   <NA>   \n",
+              "19  &gt; I don&#x27;t think the use case you menti...     colanderman   <NA>   \n",
+              "20  I think you need to watch it again, because yo...   vladimirralev   <NA>   \n",
+              "21                                               <NA>       joshreads      1   \n",
+              "22                                               <NA>           smb06      1   \n",
+              "23  Except that they clearly never tried to incent...           aenis   <NA>   \n",
+              "24                                               <NA>  GiraffeNecktie    249   \n",
+              "\n",
+              "                    timestamp     type  \n",
+              "0   2021-06-26 02:37:56+00:00  comment  \n",
+              "1   2023-04-19 15:56:34+00:00  comment  \n",
+              "2   2018-10-16 01:07:55+00:00  comment  \n",
+              "3   2015-08-10 16:05:54+00:00  comment  \n",
+              "4   2020-10-05 11:20:51+00:00  comment  \n",
+              "5   2020-09-01 15:38:50+00:00  comment  \n",
+              "6   2011-09-27 22:43:27+00:00    story  \n",
+              "7   2011-09-16 19:02:01+00:00  comment  \n",
+              "8   2012-10-15 13:09:32+00:00    story  \n",
+              "9   2023-04-21 16:45:13+00:00  comment  \n",
+              "10  2023-01-19 20:05:54+00:00  comment  \n",
+              "11  2019-06-17 19:49:17+00:00  comment  \n",
+              "12  2018-03-18 18:57:21+00:00  comment  \n",
+              "13  2019-10-22 10:41:01+00:00  comment  \n",
+              "14  2017-05-05 20:45:37+00:00  comment  \n",
+              "15  2024-07-12 18:39:52+00:00    story  \n",
+              "16  2017-09-12 18:58:20+00:00  comment  \n",
+              "17  2021-04-13 01:24:03+00:00  comment  \n",
+              "18  2022-05-13 16:08:38+00:00  comment  \n",
+              "19  2017-09-28 05:16:06+00:00  comment  \n",
+              "20  2018-12-07 11:25:52+00:00  comment  \n",
+              "21  2014-04-08 13:29:50+00:00    story  \n",
+              "22  2017-02-16 23:26:34+00:00    story  \n",
+              "23  2022-01-31 17:08:57+00:00  comment  \n",
+              "24  2011-04-19 14:25:17+00:00    story  \n",
+              "...\n",
+              "\n",
+              "[3000 rows x 6 columns]"
             ]
           },
+          "execution_count": 7,
           "metadata": {},
-          "execution_count": 7
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -2814,93 +2820,7 @@
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "                                               title  \\\n",
-              "24                      Working Best at Coffee Shops   \n",
-              "98                                              <NA>   \n",
-              "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
-              "188                                             <NA>   \n",
-              "209                                             <NA>   \n",
-              "228                                             <NA>   \n",
-              "290                                             <NA>   \n",
-              "303                                             <NA>   \n",
-              "312                                             <NA>   \n",
-              "322                                             <NA>   \n",
-              "391                                             <NA>   \n",
-              "396                                             <NA>   \n",
-              "424                                             <NA>   \n",
-              "428                                             <NA>   \n",
-              "429                                             <NA>   \n",
-              "436                                             <NA>   \n",
-              "438                                             <NA>   \n",
-              "446                                             <NA>   \n",
-              "453                                             <NA>   \n",
-              "507                                             <NA>   \n",
-              "543                                             <NA>   \n",
-              "565                                             <NA>   \n",
-              "612                                             <NA>   \n",
-              "660                                             <NA>   \n",
-              "673                                             <NA>   \n",
-              "\n",
-              "                                                  text               by  \\\n",
-              "24                                                <NA>   GiraffeNecktie   \n",
-              "98   i resisted switching to chrome for months beca...         catshirt   \n",
-              "137                                               <NA>        anigbrowl   \n",
-              "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
-              "209  I like the idea of moving that arrow the way h...          rattray   \n",
-              "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
-              "290  I leaerned more with one minute of this than a...        agumonkey   \n",
-              "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
-              "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
-              "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
-              "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
-              "396  I chose some random physics book that was good...            prawn   \n",
-              "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
-              "428  Looking through the comments there are a numbe...           moomin   \n",
-              "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
-              "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
-              "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
-              "446  &gt; This article is a bit comical to read and...           lapcat   \n",
-              "453  Large positions are most likely sold off in sm...          meowkit   \n",
-              "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
-              "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
-              "565  It’s much harder for people without hands to w...           Aeolun   \n",
-              "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
-              "660                                       Outstanding!           cafard   \n",
-              "673  On the other hand, something can be said for &...            babby   \n",
-              "\n",
-              "     score                  timestamp     type  \n",
-              "24     249  2011-04-19 14:25:17+00:00    story  \n",
-              "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
-              "137      2  2024-06-06 16:42:40+00:00    story  \n",
-              "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
-              "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
-              "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
-              "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
-              "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
-              "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
-              "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
-              "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
-              "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
-              "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
-              "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
-              "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
-              "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
-              "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
-              "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
-              "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
-              "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
-              "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
-              "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
-              "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
-              "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
-              "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
-              "...\n",
-              "\n",
-              "[123 rows x 6 columns]"
-            ],
             "text/html": [
               "<div>\n",
               "<style scoped>\n",
@@ -3158,10 +3078,96 @@
               "</table>\n",
               "<p>25 rows × 6 columns</p>\n",
               "</div>[123 rows x 6 columns in total]"
+            ],
+            "text/plain": [
+              "                                               title  \\\n",
+              "24                      Working Best at Coffee Shops   \n",
+              "98                                              <NA>   \n",
+              "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
+              "188                                             <NA>   \n",
+              "209                                             <NA>   \n",
+              "228                                             <NA>   \n",
+              "290                                             <NA>   \n",
+              "303                                             <NA>   \n",
+              "312                                             <NA>   \n",
+              "322                                             <NA>   \n",
+              "391                                             <NA>   \n",
+              "396                                             <NA>   \n",
+              "424                                             <NA>   \n",
+              "428                                             <NA>   \n",
+              "429                                             <NA>   \n",
+              "436                                             <NA>   \n",
+              "438                                             <NA>   \n",
+              "446                                             <NA>   \n",
+              "453                                             <NA>   \n",
+              "507                                             <NA>   \n",
+              "543                                             <NA>   \n",
+              "565                                             <NA>   \n",
+              "612                                             <NA>   \n",
+              "660                                             <NA>   \n",
+              "673                                             <NA>   \n",
+              "\n",
+              "                                                  text               by  \\\n",
+              "24                                                <NA>   GiraffeNecktie   \n",
+              "98   i resisted switching to chrome for months beca...         catshirt   \n",
+              "137                                               <NA>        anigbrowl   \n",
+              "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
+              "209  I like the idea of moving that arrow the way h...          rattray   \n",
+              "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
+              "290  I leaerned more with one minute of this than a...        agumonkey   \n",
+              "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
+              "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
+              "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
+              "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
+              "396  I chose some random physics book that was good...            prawn   \n",
+              "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
+              "428  Looking through the comments there are a numbe...           moomin   \n",
+              "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
+              "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
+              "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
+              "446  &gt; This article is a bit comical to read and...           lapcat   \n",
+              "453  Large positions are most likely sold off in sm...          meowkit   \n",
+              "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
+              "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
+              "565  It’s much harder for people without hands to w...           Aeolun   \n",
+              "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
+              "660                                       Outstanding!           cafard   \n",
+              "673  On the other hand, something can be said for &...            babby   \n",
+              "\n",
+              "     score                  timestamp     type  \n",
+              "24     249  2011-04-19 14:25:17+00:00    story  \n",
+              "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
+              "137      2  2024-06-06 16:42:40+00:00    story  \n",
+              "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
+              "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
+              "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
+              "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
+              "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
+              "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
+              "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
+              "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
+              "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
+              "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
+              "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
+              "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
+              "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
+              "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
+              "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
+              "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
+              "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
+              "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
+              "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
+              "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
+              "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
+              "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
+              "...\n",
+              "\n",
+              "[123 rows x 6 columns]"
             ]
           },
+          "execution_count": 13,
           "metadata": {},
-          "execution_count": 13
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -3181,6 +3187,10 @@
     }
   ],
   "metadata": {
+    "colab": {
+      "include_colab_link": true,
+      "provenance": []
+    },
     "kernelspec": {
       "display_name": "venv",
       "language": "python",
@@ -3196,11 +3206,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.11.9"
-    },
-    "colab": {
-      "provenance": [],
-      "include_colab_link": true
+      "version": "3.10.15"
     }
   },
   "nbformat": 4,
diff --git a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
index 09e3d9c969..db51afd412 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb
@@ -50,6 +50,12 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },
diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index 7307bb62e5..254ac65358 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -47,6 +47,12 @@
     "      Open in Vertex AI Workbench\n",
     "    </a>\n",
     "  </td>\n",
+    "  <td>\n",
+    "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb\">\n",
+    "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+    "      Open in BQ Studio\n",
+    "    </a>\n",
+    "  </td>\n",
     "</table>"
    ]
   },
diff --git a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
index d7419deee0..a77f3f11eb 100644
--- a/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb
@@ -49,6 +49,12 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },
diff --git a/notebooks/getting_started/bq_dataframes_template.ipynb b/notebooks/getting_started/bq_dataframes_template.ipynb
index a04c7f7907..dab4a7572f 100644
--- a/notebooks/getting_started/bq_dataframes_template.ipynb
+++ b/notebooks/getting_started/bq_dataframes_template.ipynb
@@ -50,6 +50,13 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/getting_started/bq_dataframes_template.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
+        "\n",
         "</table>"
       ]
     },
diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index 3a9bb26f57..38ce75cc25 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -50,6 +50,12 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/getting_started/getting_started_bq_dataframes.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },
diff --git a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
index e3c01058ea..d95447f7e5 100644
--- a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
+++ b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
@@ -50,6 +50,12 @@
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },
diff --git a/notebooks/ml/bq_dataframes_ml_linear_regression.ipynb b/notebooks/ml/bq_dataframes_ml_linear_regression.ipynb
index 347a3e8cff..fad2f00b31 100644
--- a/notebooks/ml/bq_dataframes_ml_linear_regression.ipynb
+++ b/notebooks/ml/bq_dataframes_ml_linear_regression.ipynb
@@ -50,6 +50,12 @@
         "      <img src=\"https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32\" alt=\"Vertex AI logo\">\n",
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/regression/bq_dataframes_ml_linear_regression.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
         "  </td>                                                                                               \n",
         "</table>"
       ]
diff --git a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
index f5a88b3066..641a30e104 100644
--- a/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
+++ b/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb
@@ -19,6 +19,12 @@
     "      View on GitHub\n",
     "    </a>\n",
     "  </td>\n",
+    "  <td>\n",
+    "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/remote_functions/remote_function_vertex_claude_model.ipynb\">\n",
+    "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+    "      Open in BQ Studio\n",
+    "    </a>\n",
+    "  </td>\n",
     "</table>"
    ]
   },
diff --git a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
index 66a35d0046..c3b4c8e616 100644
--- a/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
+++ b/notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb
@@ -44,6 +44,12 @@
         "      View on GitHub\n",
         "    </a>\n",
         "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/tree/main/notebooks/dataframes/bq_dataframes_covid_line_graphs.ipynb\">\n",
+        "      <img src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw&s\" alt=\"BQ logo\" width=\"35\">\n",
+        "      Open in BQ Studio\n",
+        "    </a>\n",
+        "  </td>\n",
         "</table>"
       ]
     },

From b86335874c288a16e67017829172e76819e4876e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 7 Jan 2025 10:10:39 -0600
Subject: [PATCH 05/22] chore: update CHANGELOG.md to include commit that
 removed `ibis-framework` dependency (#1253)

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff5ce11006..7826047761 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -97,6 +97,11 @@
 * Update df.corr, df.cov to be used with more than 30 columns case. ([#1161](https://github.com/googleapis/python-bigquery-dataframes/issues/1161)) ([9dcf1aa](https://github.com/googleapis/python-bigquery-dataframes/commit/9dcf1aa918919704dcf4d12b05935b22fb502fc6))
 
 
+### Dependencies
+
+* Remove `ibis-framework` by vendoring a fork of the package to `bigframes_vendored`. ([#1170](https://github.com/googleapis/python-bigquery-dataframes/pull/1170)) ([421d24d](https://github.com/googleapis/python-bigquery-dataframes/commit/421d24d6e61d557aa696fc701c08c84389f72ed2))
+
+
 ### Documentation
 
 * Add a code sample using `bpd.options.bigquery.ordering_mode = "partial"` ([#909](https://github.com/googleapis/python-bigquery-dataframes/issues/909)) ([f80d705](https://github.com/googleapis/python-bigquery-dataframes/commit/f80d70503b80559a0b1fe64434383aa3e028bf9b))

From c4bffc3e8ec630a362c94f9d269a66073a14ad04 Mon Sep 17 00:00:00 2001
From: Arwa Sharif <146148342+arwas11@users.noreply.github.com>
Date: Tue, 7 Jan 2025 13:40:52 -0600
Subject: [PATCH 06/22] docs: update `bigframes.pandas.pandas` docstrings
 (#1247)

* docs: update bigframes.pandas.pandas docstrings

* update read_qbq_model return type

* update read_qbq_model return section and callable return type
---
 bigframes/pandas/__init__.py                  |  3 ++-
 bigframes/session/__init__.py                 | 22 ++++++++++---------
 .../pandas/core/reshape/encoding.py           |  7 +++---
 .../pandas/core/reshape/merge.py              |  3 ++-
 .../pandas/core/reshape/tile.py               |  7 ++++--
 .../pandas/core/tools/datetimes.py            |  3 ++-
 .../bigframes_vendored/pandas/io/gbq.py       |  3 ++-
 .../pandas/io/parsers/readers.py              |  3 ++-
 8 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 9c3c98ec55..395b573916 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -163,7 +163,8 @@ def get_default_session_id() -> str:
     the table id of all temporary tables created in the global session.
 
     Returns:
-        str, the default global session id, ex. 'sessiona1b2c'
+        str:
+            The default global session id, ex. 'sessiona1b2c'
     """
     return get_global_session().session_id
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index edac7efa4b..4f59e2fdd1 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1370,13 +1370,14 @@ def remote_function(
                 `all`, `internal-only`, `internal-and-gclb`. See for more details
                 https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
         Returns:
-            callable: A remote function object pointing to the cloud assets created
-            in the background to support the remote execution. The cloud assets can be
-            located through the following properties set in the object:
+            collections.abc.Callable:
+                A remote function object pointing to the cloud assets created
+                in the background to support the remote execution. The cloud assets can be
+                located through the following properties set in the object:
 
-            `bigframes_cloud_function` - The google cloud function deployed for the user defined code.
+                `bigframes_cloud_function` - The google cloud function deployed for the user defined code.
 
-            `bigframes_remote_function` - The bigquery remote function capable of calling into `bigframes_cloud_function`.
+                `bigframes_remote_function` - The bigquery remote function capable of calling into `bigframes_cloud_function`.
         """
         return self._remote_function_session.remote_function(
             input_types,
@@ -1545,12 +1546,13 @@ def read_gbq_function(
                 a pandas Series.
 
         Returns:
-            callable: A function object pointing to the BigQuery function read
-            from BigQuery.
+            collections.abc.Callable:
+                A function object pointing to the BigQuery function read
+                from BigQuery.
 
-            The object is similar to the one created by the `remote_function`
-            decorator, including the `bigframes_remote_function` property, but
-            not including the `bigframes_cloud_function` property.
+                The object is similar to the one created by the `remote_function`
+                decorator, including the `bigframes_remote_function` property, but
+                not including the `bigframes_cloud_function` property.
         """
 
         return bigframes_rf.read_gbq_function(
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/encoding.py b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
index b7f67473ea..31b2ba4a59 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
@@ -113,8 +113,9 @@ def get_dummies(
         Data type for new columns. Only a single dtype is allowed.
 
     Returns:
-      DataFrame: Dummy-coded data. If data contains other columns than the
-      dummy-coded one(s), these will be prepended, unaltered, to the
-      result.
+      bigframes.pandas.DataFrame:
+        Dummy-coded data. If data contains other columns than the
+        dummy-coded one(s), these will be prepended, unaltered, to the
+        result.
     """
     raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/merge.py b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
index 704e50f516..66fb2c2160 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/merge.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
@@ -74,6 +74,7 @@ def merge(
             no suffix. At least one of the values must not be None.
 
     Returns:
-        bigframes.dataframe.DataFrame: A DataFrame of the two merged objects.
+        bigframes.pandas.DataFrame:
+            A DataFrame of the two merged objects.
     """
     raise NotImplementedError("abstract method")
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
index 6ba3950a76..6bda14b025 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
@@ -28,6 +28,7 @@ def cut(
 
         >>> import bigframes.pandas as bpd
         >>> bpd.options.display.progress_bar = None
+
         >>> s = bpd.Series([0, 1, 5, 10])
         >>> s
         0     0
@@ -108,7 +109,8 @@ def cut(
             bins. This affects the type of the output container.
 
     Returns:
-        Series: A Series representing the respective bin for each value
+        bigframes.pandas.Series:
+            A Series representing the respective bin for each value
             of `x`. The type depends on the value of `labels`.
             sequence of scalars : returns a Series for Series `x` or a
             Categorical for all other inputs. The values stored within
@@ -140,7 +142,8 @@ def qcut(x, q, *, labels=None, duplicates="error"):
             If bin edges are not unique, raise ValueError or drop non-uniques.
 
     Returns:
-        Series: Categorical or Series of integers if labels is False
+        bigframes.pandas.Series:
+            Categorical or Series of integers if labels is False
             The return type (Categorical or Series) depends on the input: a Series
             of type category if input is a Series else Categorical. Bins are
             represented as categories when categorical data is returned.
diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
index 52b287b949..d6048d1208 100644
--- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
+++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py
@@ -71,6 +71,7 @@ def to_datetime(
             float number.
 
     Returns:
-        Timestamp, datetime.datetime or bigframes.series.Series: Return type depends on input.
+        Union[pandas.Timestamp, datetime.datetime or bigframes.pandas.Series]:
+            Return type depends on input.
     """
     raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py
index b4dd10ef10..aa4d862b65 100644
--- a/third_party/bigframes_vendored/pandas/io/gbq.py
+++ b/third_party/bigframes_vendored/pandas/io/gbq.py
@@ -167,6 +167,7 @@ def read_gbq(
                 from a table.
 
         Returns:
-            bigframes.pandas.DataFrame: A DataFrame representing results of the query or table.
+            bigframes.pandas.DataFrame:
+                A DataFrame representing results of the query or table.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py
index 3ad810fa2e..2b1e3dd70b 100644
--- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py
+++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py
@@ -152,7 +152,8 @@ def read_csv(
                 keyword arguments for `pandas.read_csv` when not using the BigQuery engine.
 
         Returns:
-            bigframes.pandas.DataFrame: A BigQuery DataFrames.
+            bigframes.pandas.DataFrame:
+                A BigQuery DataFrames.
 
         Raises:
             bigframes.exceptions.DefaultIndexWarning:

From 1b40a112ed2da5c3c7248461613fdba8cd611449 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Tue, 7 Jan 2025 12:37:13 -0800
Subject: [PATCH 07/22] chore: update comments and tests for label counts.
 (#1182)

* chore: update comments and tests for label counts.

* update docstring

* update logic and tests

* update to use named param

* metric update

* update logic

* fix

* update value and test
---
 bigframes/session/__init__.py                 |  2 +-
 bigframes/session/_io/bigquery/__init__.py    | 46 ++++++---
 bigframes/session/executor.py                 | 58 ++++-------
 bigframes/session/loader.py                   |  6 +-
 tests/system/small/test_session.py            | 12 +--
 tests/unit/session/test_io_bigquery.py        | 98 ++++++++++++++++++-
 .../bigframes_vendored/pandas/core/frame.py   |  2 +
 7 files changed, 160 insertions(+), 64 deletions(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 4f59e2fdd1..51ca6d12b2 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1592,7 +1592,7 @@ def _start_query_ml_ddl(
         job_config.destination_encryption_configuration = None
 
         return bf_io_bigquery.start_query_with_client(
-            self.bqclient, sql, job_config, metrics=self._metrics
+            self.bqclient, sql, job_config=job_config, metrics=self._metrics
         )
 
     def _create_object_table(self, path: str, connection: str) -> str:
diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
index b7706d34ca..6a5ba3f4c7 100644
--- a/bigframes/session/_io/bigquery/__init__.py
+++ b/bigframes/session/_io/bigquery/__init__.py
@@ -40,7 +40,7 @@
 
 
 IO_ORDERING_ID = "bqdf_row_nums"
-MAX_LABELS_COUNT = 64
+MAX_LABELS_COUNT = 64 - 8
 _LIST_TABLES_LIMIT = 10000  # calls to bqclient.list_tables
 # will be limited to this many tables
 
@@ -204,7 +204,12 @@ def format_option(key: str, value: Union[bool, str]) -> str:
     return f"{key}={repr(value)}"
 
 
-def add_labels(job_config, api_name: Optional[str] = None):
+def add_and_trim_labels(job_config, api_name: Optional[str] = None):
+    """
+    Add additional labels to the job configuration and trim the total number of labels
+    to ensure they do not exceed the maximum limit allowed by BigQuery, which is 64
+    labels per job.
+    """
     api_methods = log_adapter.get_and_reset_api_methods(dry_run=job_config.dry_run)
     job_config.labels = create_job_configs_labels(
         job_configs_labels=job_config.labels,
@@ -217,7 +222,10 @@ def start_query_with_client(
     bq_client: bigquery.Client,
     sql: str,
     job_config: bigquery.job.QueryJobConfig,
+    location: Optional[str] = None,
+    project: Optional[str] = None,
     max_results: Optional[int] = None,
+    page_size: Optional[int] = None,
     timeout: Optional[float] = None,
     api_name: Optional[str] = None,
     metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
@@ -225,10 +233,17 @@ def start_query_with_client(
     """
     Starts query job and waits for results.
     """
-    add_labels(job_config, api_name=api_name)
-
     try:
-        query_job = bq_client.query(sql, job_config=job_config, timeout=timeout)
+        # Note: Ensure no additional labels are added to job_config after this point,
+        # as `add_and_trim_labels` ensures the label count does not exceed 64.
+        add_and_trim_labels(job_config, api_name=api_name)
+        query_job = bq_client.query(
+            sql,
+            job_config=job_config,
+            location=location,
+            project=project,
+            timeout=timeout,
+        )
     except google.api_core.exceptions.Forbidden as ex:
         if "Drive credentials" in ex.message:
             ex.message += CHECK_DRIVE_PERMISSIONS
@@ -237,10 +252,15 @@ def start_query_with_client(
     opts = bigframes.options.display
     if opts.progress_bar is not None and not query_job.configuration.dry_run:
         results_iterator = formatting_helpers.wait_for_query_job(
-            query_job, max_results=max_results, progress_bar=opts.progress_bar
+            query_job,
+            max_results=max_results,
+            progress_bar=opts.progress_bar,
+            page_size=page_size,
         )
     else:
-        results_iterator = query_job.result(max_results=max_results)
+        results_iterator = query_job.result(
+            max_results=max_results, page_size=page_size
+        )
 
     if metrics is not None:
         metrics.count_job_stats(query_job)
@@ -304,11 +324,15 @@ def create_bq_dataset_reference(
         bigquery.DatasetReference: The constructed reference to the anonymous dataset.
     """
     job_config = google.cloud.bigquery.QueryJobConfig()
-    add_labels(job_config, api_name=api_name)
-    query_job = bq_client.query(
-        "SELECT 1", location=location, project=project, job_config=job_config
+
+    _, query_job = start_query_with_client(
+        bq_client,
+        "SELECT 1",
+        location=location,
+        job_config=job_config,
+        project=project,
+        api_name=api_name,
     )
-    query_job.result()  # blocks until finished
 
     # The anonymous dataset is used by BigQuery to write query results and
     # session tables. BigQuery DataFrames also writes temp tables directly
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 01476ed113..9ca1fa3117 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -48,7 +48,6 @@
 import bigframes.core.schema
 import bigframes.core.tree_properties as tree_properties
 import bigframes.features
-import bigframes.formatting_helpers as formatting_helpers
 import bigframes.session._io.bigquery as bq_io
 import bigframes.session.metrics
 import bigframes.session.planner
@@ -347,10 +346,14 @@ def export_gcs(
             format=format,
             export_options=dict(export_options),
         )
-        job_config = bigquery.QueryJobConfig()
-        bq_io.add_labels(job_config, api_name=f"dataframe-to_{format.lower()}")
-        export_job = self.bqclient.query(export_data_statement, job_config=job_config)
-        self._wait_on_job(export_job)
+
+        bq_io.start_query_with_client(
+            self.bqclient,
+            export_data_statement,
+            job_config=bigquery.QueryJobConfig(),
+            api_name=f"dataframe-to_{format.lower()}",
+            metrics=self.metrics,
+        )
         return query_job
 
     def dry_run(
@@ -358,9 +361,7 @@ def dry_run(
     ) -> bigquery.QueryJob:
         sql = self.to_sql(array_value, ordered=ordered)
         job_config = bigquery.QueryJobConfig(dry_run=True)
-        bq_io.add_labels(job_config)
         query_job = self.bqclient.query(sql, job_config=job_config)
-        _ = query_job.result()
         return query_job
 
     def peek(
@@ -487,15 +488,19 @@ def _run_execute_query(
         if not self.strictly_ordered:
             job_config.labels["bigframes-mode"] = "unordered"
 
-        # Note: add_labels is global scope which may have unexpected effects
-        bq_io.add_labels(job_config, api_name=api_name)
+        # Note: add_and_trim_labels is global scope which may have unexpected effects
+        # Ensure no additional labels are added to job_config after this point,
+        # as `add_and_trim_labels` ensures the label count does not exceed 64.
+        bq_io.add_and_trim_labels(job_config, api_name=api_name)
         try:
-            query_job = self.bqclient.query(sql, job_config=job_config)
-            return (
-                self._wait_on_job(
-                    query_job, max_results=max_results, page_size=page_size
-                ),
-                query_job,
+            return bq_io.start_query_with_client(
+                self.bqclient,
+                sql,
+                job_config=job_config,
+                api_name=api_name,
+                max_results=max_results,
+                page_size=page_size,
+                metrics=self.metrics,
             )
 
         except google.api_core.exceptions.BadRequest as e:
@@ -506,29 +511,6 @@ def _run_execute_query(
             else:
                 raise
 
-    def _wait_on_job(
-        self,
-        query_job: bigquery.QueryJob,
-        page_size: Optional[int] = None,
-        max_results: Optional[int] = None,
-    ) -> bq_table.RowIterator:
-        opts = bigframes.options.display
-        if opts.progress_bar is not None and not query_job.configuration.dry_run:
-            results_iterator = formatting_helpers.wait_for_query_job(
-                query_job,
-                progress_bar=opts.progress_bar,
-                max_results=max_results,
-                page_size=page_size,
-            )
-        else:
-            results_iterator = query_job.result(
-                max_results=max_results, page_size=page_size
-            )
-
-        if self.metrics is not None:
-            self.metrics.count_job_stats(query_job)
-        return results_iterator
-
     def replace_cached_subtrees(self, node: nodes.BigFrameNode) -> nodes.BigFrameNode:
         return nodes.top_down(
             node, lambda x: self._cached_executions.get(x, x), memoize=True
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index e7579b1138..ec922e286d 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -707,9 +707,9 @@ def _start_query(
         return bf_io_bigquery.start_query_with_client(
             self._bqclient,
             sql,
-            job_config,
-            max_results,
-            timeout,
+            job_config=job_config,
+            max_results=max_results,
+            timeout=timeout,
             api_name=api_name,
         )
 
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index f722ccbe75..960e40465b 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -574,16 +574,10 @@ def test_read_gbq_with_custom_global_labels(
         bigframes.options.compute.extra_query_labels["test3"] = False
 
         query_job = session.read_gbq(scalars_table_id).query_job
-        job_labels = query_job.labels  # type:ignore
-        expected_labels = {"test1": "1", "test2": "abc", "test3": "false"}
-
-        # All jobs should include a bigframes-api key. See internal issue 336521938.
-        assert "bigframes-api" in job_labels
-
-        assert all(
-            job_labels.get(key) == value for key, value in expected_labels.items()
-        )
 
+        # No real job created from read_gbq, so we should expect 0 labels
+        assert query_job is not None
+        assert query_job.labels == {}
     # No labels outside of the option_context.
     assert len(bigframes.options.compute.extra_query_labels) == 0
 
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index f06578ce03..36caea0c0e 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -26,6 +26,24 @@
 from tests.unit import resources
 
 
+@pytest.fixture(scope="function")
+def mock_bq_client(mocker):
+    mock_client = mocker.Mock(spec=bigquery.Client)
+    mock_query_job = mocker.Mock(spec=bigquery.QueryJob)
+    mock_row_iterator = mocker.Mock(spec=bigquery.table.RowIterator)
+
+    mock_query_job.result.return_value = mock_row_iterator
+
+    mock_destination = bigquery.DatasetReference(
+        project="mock_project", dataset_id="mock_dataset"
+    )
+    mock_query_job.destination = mock_destination
+
+    mock_client.query.return_value = mock_query_job
+
+    return mock_client
+
+
 def test_create_job_configs_labels_is_none():
     api_methods = ["agg", "series-mode"]
     labels = io_bq.create_job_configs_labels(
@@ -124,7 +142,7 @@ def test_create_job_configs_labels_length_limit_met():
         "bigframes-api": "read_pandas",
         "source": "bigquery-dataframes-temp",
     }
-    for i in range(61):
+    for i in range(53):
         key = f"bigframes-api-test-{i}"
         value = f"test{i}"
         cur_labels[key] = value
@@ -141,13 +159,89 @@ def test_create_job_configs_labels_length_limit_met():
         job_configs_labels=cur_labels, api_methods=api_methods
     )
     assert labels is not None
-    assert len(labels) == 64
+    assert len(labels) == 56
     assert "dataframe-max" in labels.values()
     assert "dataframe-head" not in labels.values()
     assert "bigframes-api" in labels.keys()
     assert "source" in labels.keys()
 
 
+def test_add_and_trim_labels_length_limit_met():
+    log_adapter.get_and_reset_api_methods()
+    cur_labels = {
+        "bigframes-api": "read_pandas",
+        "source": "bigquery-dataframes-temp",
+    }
+    for i in range(10):
+        key = f"bigframes-api-test-{i}"
+        value = f"test{i}"
+        cur_labels[key] = value
+
+    df = bpd.DataFrame(
+        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+    )
+
+    job_config = bigquery.job.QueryJobConfig()
+    job_config.labels = cur_labels
+
+    df.max()
+    for _ in range(52):
+        df.head()
+
+    io_bq.add_and_trim_labels(job_config=job_config)
+    assert job_config.labels is not None
+    assert len(job_config.labels) == 56
+    assert "dataframe-max" not in job_config.labels.values()
+    assert "dataframe-head" in job_config.labels.values()
+    assert "bigframes-api" in job_config.labels.keys()
+    assert "source" in job_config.labels.keys()
+
+
+@pytest.mark.parametrize(
+    ("max_results", "timeout", "api_name"),
+    [(None, None, None), (100, 30.0, "test_api")],
+)
+def test_start_query_with_client_labels_length_limit_met(
+    mock_bq_client, max_results, timeout, api_name
+):
+    sql = "select * from abc"
+    cur_labels = {
+        "bigframes-api": "read_pandas",
+        "source": "bigquery-dataframes-temp",
+    }
+    for i in range(10):
+        key = f"bigframes-api-test-{i}"
+        value = f"test{i}"
+        cur_labels[key] = value
+
+    df = bpd.DataFrame(
+        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+    )
+
+    job_config = bigquery.job.QueryJobConfig()
+    job_config.labels = cur_labels
+
+    df.max()
+    for _ in range(52):
+        df.head()
+
+    io_bq.start_query_with_client(
+        mock_bq_client,
+        sql,
+        job_config,
+        max_results=max_results,
+        timeout=timeout,
+        api_name=api_name,
+    )
+
+    assert job_config.labels is not None
+    assert len(job_config.labels) == 56
+    assert "dataframe-max" not in job_config.labels.values()
+    assert "dataframe-head" in job_config.labels.values()
+    assert "bigframes-api" in job_config.labels.keys()
+    assert "source" in job_config.labels.keys()
+
+
 def test_create_temp_table_default_expiration():
     """Make sure the created table has an expiration."""
     expiration = datetime.datetime(
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 053ed7b94c..f1565ed536 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -6771,6 +6771,7 @@ def iat(self):
         **Examples:**
 
             >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
             ...                    columns=['A', 'B', 'C'])
             >>> bpd.options.display.progress_bar = None
@@ -6804,6 +6805,7 @@ def at(self):
         **Examples:**
 
             >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
             ...   index=[4, 5, 6], columns=['A', 'B', 'C'])
             >>> bpd.options.display.progress_bar = None

From 5934f8ee0a1c950a820d1911d73a46f6891a40bb Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@users.noreply.github.com>
Date: Tue, 7 Jan 2025 14:19:37 -0800
Subject: [PATCH 08/22] feat: Support DataFrame.astype(dict) (#1262)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [WIP] Support dict dtypes for df.astype()

* feat: Support DataFrame.astype(dict)

* add test cases for failure

* remove test notebook

* re-write type-checking logic to make mypy happy

* fix format

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* use reflection in dtypes module, and update error type

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 bigframes/dataframe.py               | 27 ++++++++++++++++++++++---
 bigframes/dtypes.py                  |  4 ++++
 tests/system/small/test_dataframe.py | 30 ++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index efbe56abf7..0503a38ae6 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -367,14 +367,35 @@ def __iter__(self):
 
     def astype(
         self,
-        dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
+        dtype: Union[
+            bigframes.dtypes.DtypeString,
+            bigframes.dtypes.Dtype,
+            dict[str, Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype]],
+        ],
         *,
         errors: Literal["raise", "null"] = "raise",
     ) -> DataFrame:
         if errors not in ["raise", "null"]:
             raise ValueError("Arg 'error' must be one of 'raise' or 'null'")
-        return self._apply_unary_op(
-            ops.AsTypeOp(to_type=dtype, safe=(errors == "null"))
+
+        safe_cast = errors == "null"
+
+        # Type strings check
+        if dtype in bigframes.dtypes.DTYPE_STRINGS:
+            return self._apply_unary_op(ops.AsTypeOp(dtype, safe_cast))
+
+        # Type instances check
+        if type(dtype) in bigframes.dtypes.DTYPES:
+            return self._apply_unary_op(ops.AsTypeOp(dtype, safe_cast))
+
+        if isinstance(dtype, dict):
+            result = self.copy()
+            for col, to_type in dtype.items():
+                result[col] = result[col].astype(to_type)
+            return result
+
+        raise TypeError(
+            f"Invalid type {type(dtype)} for dtype input. {constants.FEEDBACK_LINK}"
         )
 
     def _to_sql_query(
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index ff3e7a31fb..6e179225ea 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -36,6 +36,8 @@
     pd.ArrowDtype,
     gpd.array.GeometryDtype,
 ]
+
+DTYPES = typing.get_args(Dtype)
 # Represents both column types (dtypes) and local-only types
 # None represents the type of a None scalar.
 ExpressionType = typing.Optional[Dtype]
@@ -238,6 +240,8 @@ class SimpleDtypeInfo:
     "binary[pyarrow]",
 ]
 
+DTYPE_STRINGS = typing.get_args(DtypeString)
+
 BOOL_BIGFRAMES_TYPES = [BOOL_DTYPE]
 
 # Corresponds to the pandas concept of numeric type (such as when 'numeric_only' is specified in an operation)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index bae71b33be..4e0e5c2739 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -5199,3 +5199,33 @@ def test__resample_start_time(rule, origin, data):
     pd.testing.assert_frame_equal(
         bf_result, pd_result, check_dtype=False, check_index_type=False
     )
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pytest.param("string[pyarrow]", id="type-string"),
+        pytest.param(pd.StringDtype(storage="pyarrow"), id="type-literal"),
+        pytest.param(
+            {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()},
+            id="multiple-types",
+        ),
+    ],
+)
+def test_astype(scalars_dfs, dtype):
+    bf_df, pd_df = scalars_dfs
+    target_cols = ["bool_col", "int64_col"]
+    bf_df = bf_df[target_cols]
+    pd_df = pd_df[target_cols]
+
+    bf_result = bf_df.astype(dtype).to_pandas()
+    pd_result = pd_df.astype(dtype)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_astype_invalid_type_fail(scalars_dfs):
+    bf_df, _ = scalars_dfs
+
+    with pytest.raises(TypeError, match=r".*Share your usecase with.*"):
+        bf_df.astype(123)

From bb7a85005ebebfbcb0d2a4d5c4c27b354f38d3d1 Mon Sep 17 00:00:00 2001
From: Jiaxun Wu <35040939+jiaxunwu@users.noreply.github.com>
Date: Tue, 7 Jan 2025 21:02:59 -0800
Subject: [PATCH 09/22] docs: use 002 model for better scalability in text
 generation (#1270)

---
 notebooks/experimental/semantic_operators.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index b3b989dd43..d3fec469b4 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -188,7 +188,7 @@
       "source": [
         "Create LLM instances. They will be passed in as parameters for each semantic operator.\n",
         "\n",
-        "This tutorial uses the \"gemini-1.5-flash-001\" model for text generation and \"text-embedding-005\" for embedding. While these are recommended, you can choose [other Vertex AI LLM models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) based on your needs and availability. Ensure you have [sufficient quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas) for your chosen models and adjust it if necessary."
+        "This tutorial uses the \"gemini-1.5-flash-002\" model for text generation and \"text-embedding-005\" for embedding. While these are recommended, you can choose [other Vertex AI LLM models](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models) based on your needs and availability. Ensure you have [sufficient quota](https://cloud.google.com/vertex-ai/generative-ai/docs/quotas) for your chosen models and adjust it if necessary."
       ]
     },
     {

From 059a564095dfea0518982f13c8118d3807861ccf Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Wed, 8 Jan 2025 12:45:09 -0600
Subject: [PATCH 10/22] docs: add snippet to see the ARIMA coefficients in the
 Forecast a single time series with a univariate model tutorial (#1268)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* merge main

* details

* Update samples/snippets/create_single_timeseries_forecasting_model_test.py

Co-authored-by: Tim Sweña (Swast) <swast@google.com>

* fix

* edit

---------

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
---
 ...create_single_timeseries_forecasting_model_test.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 0e69eba3dd..d59c3ab5a7 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -64,6 +64,17 @@ def test_create_single_timeseries() -> None:
 
     model.fit(X, y)
     # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create]
+
+    # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
+    coef = model.coef_
+    print(coef.peek())
+
+    # Expected output:
+    #       ar_coefficients   ma_coefficients   intercept_or_drift
+    #   0	 [0.40944762]	   [-0.81168198]	      0.0
+
+    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
+    assert coef is not None
     assert model is not None
     assert parsed_date is not None
     assert total_visits is not None

From 27bbd8085ccac175f113afbd6c94b52c034a3d97 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Wed, 8 Jan 2025 13:52:32 -0800
Subject: [PATCH 11/22] feat: bigframes.bigquery.parse_json (#1265)

* feat: bigframes.bigquery.parse_json

* add preview doc and warning

* nit
---
 bigframes/bigquery/__init__.py               | 11 +++++-
 bigframes/bigquery/_operations/json.py       | 40 ++++++++++++++++++++
 bigframes/core/compile/scalar_op_compiler.py | 12 ++++--
 bigframes/core/utils.py                      | 25 ++++++++++++
 bigframes/operations/__init__.py             | 17 +++++++--
 bigframes/operations/blob.py                 |  4 +-
 tests/system/small/bigquery/test_json.py     |  5 +++
 7 files changed, 103 insertions(+), 11 deletions(-)

diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index a39914d6e7..ff52ae8d36 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -27,20 +27,27 @@
     json_extract_array,
     json_extract_string_array,
     json_set,
+    parse_json,
 )
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
 from bigframes.bigquery._operations.struct import struct
 
 __all__ = [
+    # approximate aggregate ops
+    "approx_top_count",
+    # array ops
     "array_length",
     "array_agg",
     "array_to_string",
+    # json ops
     "json_set",
     "json_extract",
     "json_extract_array",
     "json_extract_string_array",
-    "approx_top_count",
-    "struct",
+    "parse_json",
+    # search ops
     "create_vector_index",
     "vector_search",
+    # struct ops
+    "struct",
 ]
diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py
index 843991807e..52b01d3ef7 100644
--- a/bigframes/bigquery/_operations/json.py
+++ b/bigframes/bigquery/_operations/json.py
@@ -23,6 +23,7 @@
 
 from typing import Any, cast, Optional, Sequence, Tuple, Union
 
+import bigframes.core.utils as utils
 import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.series as series
@@ -30,6 +31,7 @@
 from . import array
 
 
+@utils.preview(name="The JSON-related API `json_set`")
 def json_set(
     input: series.Series,
     json_path_value_pairs: Sequence[Tuple[str, Any]],
@@ -37,6 +39,10 @@ def json_set(
     """Produces a new JSON value within a Series by inserting or replacing values at
     specified paths.
 
+    .. warning::
+        The JSON-related API `parse_json` is in preview. Its behavior may change in
+        future versions.
+
     **Examples:**
 
         >>> import bigframes.pandas as bpd
@@ -223,3 +229,37 @@ def json_extract_string_array(
             ),
         )
     return array_series
+
+
+@utils.preview(name="The JSON-related API `parse_json`")
+def parse_json(
+    input: series.Series,
+) -> series.Series:
+    """Converts a series with a JSON-formatted STRING value to a JSON value.
+
+    .. warning::
+        The JSON-related API `parse_json` is in preview. Its behavior may change in
+        future versions.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
+        >>> s
+        0    {"class": {"students": [{"id": 5}, {"id": 12}]}}
+        dtype: string
+        >>> bbq.parse_json(s)
+        0    {"class":{"students":[{"id":5},{"id":12}]}}
+        dtype: large_string[pyarrow]
+
+    Args:
+        input (bigframes.series.Series):
+            The Series containing JSON-formatted strings).
+
+    Returns:
+        bigframes.series.Series: A new Series with the JSON value.
+    """
+    return input._apply_unary_op(ops.ParseJSON())
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 2b85a97483..d594cb3d68 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -20,7 +20,6 @@
 import bigframes_vendored.constants as constants
 import bigframes_vendored.ibis.expr.api as ibis_api
 import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
-import bigframes_vendored.ibis.expr.operations as ibis_ops
 import bigframes_vendored.ibis.expr.operations.generic as ibis_generic
 import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
 import bigframes_vendored.ibis.expr.types as ibis_types
@@ -1181,13 +1180,13 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
         )
     else:
         # Enabling JSON type eliminates the need for less efficient string conversions.
-        return ibis_ops.ToJsonString(
+        return to_json_string(
             json_set(  # type: ignore
-                json_obj=parse_json(x),
+                json_obj=parse_json(json_str=x),
                 json_path=op.json_path,
                 json_value=y,
             )
-        ).to_expr()
+        )
 
 
 @scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True)
@@ -1210,6 +1209,11 @@ def json_extract_string_array_op_impl(
     return json_extract_string_array(json_obj=x, json_path=op.json_path)
 
 
+@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
+def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
+    return parse_json(json_str=x)
+
+
 @scalar_op_compiler.register_unary_op(ops.ToJSONString)
 def to_json_string_op_impl(json_obj: ibis_types.Value):
     return to_json_string(json_obj=json_obj)
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index e684ac55a4..3bafa380bf 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -11,14 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import functools
 import re
 import typing
 from typing import Hashable, Iterable, List
+import warnings
 
 import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
 import pandas as pd
 import typing_extensions
 
+import bigframes.exceptions as exc
+
 UNNAMED_COLUMN_ID = "bigframes_unnamed_column"
 UNNAMED_INDEX_ID = "bigframes_unnamed_index"
 
@@ -164,3 +168,24 @@ def merge_column_labels(
             result_labels.append(col_label)
 
     return pd.Index(result_labels)
+
+
+def warn_preview(msg=""):
+    """Warn a preview API."""
+    warnings.warn(msg, exc.PreviewWarning)
+
+
+def preview(*, name: str):
+    """Decorate to warn of a preview API."""
+
+    def decorator(func):
+        msg = f"{name} is in preview. Its behavior may change in future versions."
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            warn_preview(msg=msg)
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 03d9d60d5f..37a40b7d01 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -740,6 +740,20 @@ def output_type(self, *input_types):
         )
 
 
+@dataclasses.dataclass(frozen=True)
+class ParseJSON(UnaryOp):
+    name: typing.ClassVar[str] = "parse_json"
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if input_type != dtypes.STRING_DTYPE:
+            raise TypeError(
+                "Input type must be an valid JSON-formatted string type."
+                + f" Received type: {input_type}"
+            )
+        return dtypes.JSON_DTYPE
+
+
 @dataclasses.dataclass(frozen=True)
 class ToJSONString(UnaryOp):
     name: typing.ClassVar[str] = "to_json_string"
@@ -754,9 +768,6 @@ def output_type(self, *input_types):
         return dtypes.STRING_DTYPE
 
 
-to_json_string_op = ToJSONString()
-
-
 ## Blob Ops
 @dataclasses.dataclass(frozen=True)
 class ObjGetAccessUrl(UnaryOp):
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 898d56ab83..d41cdf2b2d 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -110,8 +110,8 @@ def image_blur(
         )
         dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW"))
 
-        src_rt = src_rt._apply_unary_op(ops.to_json_string_op)
-        dst_rt = dst_rt._apply_unary_op(ops.to_json_string_op)
+        src_rt = src_rt._apply_unary_op(ops.ToJSONString())
+        dst_rt = dst_rt._apply_unary_op(ops.ToJSONString())
 
         df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
         df["ksize_x"], df["ksize_y"] = ksize
diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py
index 3096897c80..b01ac3aaf2 100644
--- a/tests/system/small/bigquery/test_json.py
+++ b/tests/system/small/bigquery/test_json.py
@@ -209,3 +209,8 @@ def test_json_in_struct():
         "SELECT STRUCT(JSON '{\\\"a\\\": 1}' AS data, 1 AS number) as struct_col"
     )
     assert df["struct_col"].struct.field("data")[0] == '{"a":1}'
+
+
+def test_parse_json_w_invalid_series_type():
+    with pytest.raises(TypeError):
+        bbq.parse_json(bpd.Series([1, 2]))

From 2c771aa9fd74be0d8420425de983f0d3e0330610 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Wed, 8 Jan 2025 16:56:03 -0600
Subject: [PATCH 12/22] chore: rename snippet (#1275)

---
 .../create_single_timeseries_forecasting_model_test.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index d59c3ab5a7..6801425fe5 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -65,7 +65,7 @@ def test_create_single_timeseries() -> None:
     model.fit(X, y)
     # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create]
 
-    # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
+    # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_coef]
     coef = model.coef_
     print(coef.peek())
 
@@ -73,7 +73,7 @@ def test_create_single_timeseries() -> None:
     #       ar_coefficients   ma_coefficients   intercept_or_drift
     #   0	 [0.40944762]	   [-0.81168198]	      0.0
 
-    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
+    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_coef]
     assert coef is not None
     assert model is not None
     assert parsed_date is not None

From 088b18320d18193940cb6e5a20b34fb0f4c30ac6 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Wed, 8 Jan 2025 15:51:01 -0800
Subject: [PATCH 13/22] chore: add experimental image blob preview in
 DataFrame._repr_html_ (#1276)

---
 bigframes/dataframe.py | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0503a38ae6..e7a0444af1 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -739,10 +739,23 @@ def _repr_html_(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
+        df = self.copy()
+        if bigframes.options.experiments.blob:
+            import bigframes.bigquery as bbq
+
+            blob_cols = [
+                col
+                for col in df.columns
+                if df[col].dtype == bigframes.dtypes.OBJ_REF_DTYPE
+            ]
+            for col in blob_cols:
+                df[col] = df[col]._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
+                df[col] = bbq.json_extract(df[col], "$.access_urls.read_url")
+
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
-        pandas_df, row_count, query_job = self._block.retrieve_repr_request_results(
+        pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
             max_results
         )
 
@@ -751,8 +764,31 @@ def _repr_html_(self) -> str:
         column_count = len(pandas_df.columns)
 
         with display_options.pandas_repr(opts):
-            # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
-            html_string = pandas_df._repr_html_()  # type:ignore
+            # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
+            if bigframes.options.experiments.blob:
+
+                def url_to_image_html(url: str) -> str:
+                    # url is a json string, which already contains double-quotes ""
+                    return f"<img src={url}>"
+
+                formatters = {blob_col: url_to_image_html for blob_col in blob_cols}
+
+                # set max_colwidth so not to truncate the image url
+                with pandas.option_context("display.max_colwidth", None):
+                    max_rows = pandas.get_option("display.max_rows")
+                    max_cols = pandas.get_option("display.max_columns")
+                    show_dimensions = pandas.get_option("display.show_dimensions")
+                    html_string = pandas_df.to_html(
+                        escape=False,
+                        notebook=True,
+                        max_rows=max_rows,
+                        max_cols=max_cols,
+                        show_dimensions=show_dimensions,
+                        formatters=formatters,  # type: ignore
+                    )
+            else:
+                # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
+                html_string = pandas_df._repr_html_()  # type:ignore
 
         html_string += f"[{row_count} rows x {column_count} columns in total]"
         return html_string

From c16b3a894e101cee1344280c991b606503e9e3f2 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Thu, 9 Jan 2025 12:35:04 -0600
Subject: [PATCH 14/22] chore: add _test to file name (#1274)

* add _test to file name

* chore: edit file name
---
 ...l.py => create_multiple_timeseries_forecasting_model_test.py} | 1 -
 1 file changed, 1 deletion(-)
 rename samples/snippets/{create_multiple_timeseries_forecasting_model.py => create_multiple_timeseries_forecasting_model_test.py} (99%)

diff --git a/samples/snippets/create_multiple_timeseries_forecasting_model.py b/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
similarity index 99%
rename from samples/snippets/create_multiple_timeseries_forecasting_model.py
rename to samples/snippets/create_multiple_timeseries_forecasting_model_test.py
index b749c37d50..e414fdea9c 100644
--- a/samples/snippets/create_multiple_timeseries_forecasting_model.py
+++ b/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
@@ -17,7 +17,6 @@ def test_multiple_timeseries_forecasting_model(random_model_id: str) -> None:
     your_model_id = random_model_id
 
     # [START bigquery_dataframes_bqml_arima_multiple_step_2_visualize]
-
     import bigframes.pandas as bpd
 
     df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")

From 3dcae2dca45efdd4493cf3f367bf025ea291f4df Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Thu, 9 Jan 2025 16:15:46 -0600
Subject: [PATCH 15/22] docs: add snippet to evaluate ARIMA plus model in the
 Forecast a single time series with a univariate model tutorial (#1267)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* merge main

* details

* Update samples/snippets/create_single_timeseries_forecasting_model_test.py

Co-authored-by: Tim Sweña (Swast) <swast@google.com>

* expand ibis to bq type conversion

* added unit test for from_ibis

---------

Co-authored-by: Tim Sweña (Swast) <swast@google.com>
---
 ...ingle_timeseries_forecasting_model_test.py | 20 ++++++++++++++++++-
 tests/unit/core/test_dtypes.py                | 10 ++++++++++
 .../ibis/backends/bigquery/datatypes.py       |  2 ++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 6801425fe5..b66860418b 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -72,9 +72,27 @@ def test_create_single_timeseries() -> None:
     # Expected output:
     #       ar_coefficients   ma_coefficients   intercept_or_drift
     #   0	 [0.40944762]	   [-0.81168198]	      0.0
-
     # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_coef]
+
+    # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
+    # Evaluate the time series models by using the summary() function. The summary()
+    # function shows you the evaluation metrics of all the candidate models evaluated
+    # during the process of automatic hyperparameter tuning.
+    summary = model.summary(
+        show_all_candidate_models=True,
+    )
+    print(summary.peek())
+
+    # Expected output:
+    # row   non_seasonal_p	non_seasonal_d	non_seasonal_q	has_drift	log_likelihood	AIC	variance	seasonal_periods	has_holiday_effect	has_spikes_and_dips	has_step_changes	error_message
+    #  0	      0	              1	               3	      True	     -2464.255656	4938.511313	     42772.506055	        ['WEEKLY']	            False	        False	            True
+    #  1	      2	              1	               0	      False	     -2473.141651	4952.283303	     44942.416463	        ['WEEKLY']	            False	        False	            True
+    #  2	      1	              1	               0 	      False	     -2479.880885	4963.76177	     46642.953433	        ['WEEKLY']	            False	        False	            True
+    #  3	      0	              1	               1	      False	     -2470.632377	4945.264753	     44319.379307	        ['WEEKLY']	            False	        False	            True
+    #  4	      2	              1	               1	      True	     -2463.671247	4937.342493	     42633.299513	        ['WEEKLY']	            False	        False	            True
+    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_evaluate]
     assert coef is not None
+    assert summary is not None
     assert model is not None
     assert parsed_date is not None
     assert total_visits is not None
diff --git a/tests/unit/core/test_dtypes.py b/tests/unit/core/test_dtypes.py
index 83a643f6e1..a5b0889bf9 100644
--- a/tests/unit/core/test_dtypes.py
+++ b/tests/unit/core/test_dtypes.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import bigframes_vendored.ibis.backends.bigquery.datatypes as ibis_bq_types
 import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
 import bigframes_vendored.ibis.expr.types as ibis_types
 import geopandas as gpd  # type: ignore
@@ -152,6 +153,15 @@ def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
     assert result == arrow_dtype
 
 
+@pytest.mark.parametrize(
+    ("ibis_dtype", "bigquery_type"),
+    [(ibis_dtypes.String(), "STRING"), (ibis_dtypes.String(nullable=False), "STRING")],
+)
+def test_ibis_dtype_to_bigquery_type(ibis_dtype, bigquery_type):
+    result = ibis_bq_types.BigQueryType.from_ibis(ibis_dtype)
+    assert result == bigquery_type
+
+
 @pytest.mark.parametrize(
     ["bigframes_dtype", "ibis_dtype"],
     [
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py b/third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
index baf20dff75..5b4e4d85a1 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/datatypes.py
@@ -55,6 +55,8 @@ def from_ibis(cls, dtype: dt.DataType) -> str:
             return "INT64"
         elif dtype.is_binary():
             return "BYTES"
+        elif dtype.is_string():
+            return "STRING"
         elif dtype.is_date():
             return "DATE"
         elif dtype.is_timestamp():

From 77c8898580581f79258638b055c2573491cd32ba Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 9 Jan 2025 15:32:02 -0800
Subject: [PATCH 16/22] chore: add experimental blob.image_blur() dst as folder
 (#1279)

* chore: add experimental blob.image_blur() dst as folder

* fix
---
 bigframes/operations/blob.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index d41cdf2b2d..f78db2b6fc 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -14,12 +14,14 @@
 
 from __future__ import annotations
 
-from typing import Optional
+import os
+from typing import cast, Optional, Union
 
 import IPython.display as ipy_display
 import requests
 
 from bigframes import clients
+import bigframes.dataframe
 from bigframes.operations import base
 import bigframes.operations as ops
 import bigframes.series
@@ -74,7 +76,7 @@ def image_blur(
         self,
         ksize: tuple[int, int],
         *,
-        dst: bigframes.series.Series,
+        dst: Union[str, bigframes.series.Series],
         connection: Optional[str] = None,
     ) -> bigframes.series.Series:
         """Blurs images.
@@ -84,11 +86,11 @@ def image_blur(
 
         Args:
             ksize (tuple(int, int)): Kernel size.
-            dst (bigframes.series.Series): Destination blob series.
-            connection (str or None, default None): BQ connection used for internet transactions. If None, uses default connection of the session.
+            dst (str or bigframes.series.Series): Destination GCS folder str or blob series.
+            connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
 
         Returns:
-            JSON: Runtime info of the Blob.
+            BigFrames Blob Series
         """
         import bigframes.blob._functions as blob_func
 
@@ -99,6 +101,15 @@ def image_blur(
             default_location=self._block.session._location,
         )
 
+        if isinstance(dst, str):
+            dst = os.path.join(dst, "")
+            src_uri = bigframes.series.Series(self._block).struct.explode()["uri"]
+            # Replace src folder with dst folder, keep the file names.
+            dst_uri = src_uri.str.replace(r"^.*\/(.*)$", rf"{dst}\1", regex=True)
+            dst = cast(
+                bigframes.series.Series, dst_uri.str.to_blob(connection=connection)
+            )
+
         image_blur_udf = blob_func.TransformFunction(
             blob_func.image_blur_def,
             session=self._block.session,
@@ -116,4 +127,7 @@ def image_blur(
         df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
         df["ksize_x"], df["ksize_y"] = ksize
 
-        return df.apply(image_blur_udf, axis=1)
+        res = df.apply(image_blur_udf, axis=1)
+        res.cache()  # to execute the udf
+
+        return dst

From c71ec093314409cd4c7a52a713dbd6164fbbd792 Mon Sep 17 00:00:00 2001
From: Alexander Butler <41213451+z3z1ma@users.noreply.github.com>
Date: Fri, 10 Jan 2025 15:02:03 -0700
Subject: [PATCH 17/22] deps: relax sqlglot upper bound (#1278)

---
 setup.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index f08f53eaf7..e3fda9d36f 100644
--- a/setup.py
+++ b/setup.py
@@ -57,14 +57,11 @@
     "requests >=2.27.1",
     "scikit-learn >=1.2.2",
     "sqlalchemy >=1.4,<3.0dev",
-    # Keep sqlglot versions in sync with ibis-framework. This avoids problems
-    # where the incorrect version of sqlglot is installed, such as
-    # https://github.com/googleapis/python-bigquery-dataframes/issues/315
-    "sqlglot >=23.6.3,<25.2",
-    "tabulate >= 0.9",
+    "sqlglot >=23.6.3",
+    "tabulate >=0.9",
     "ipywidgets >=7.7.1",
-    "humanize >= 4.6.0",
-    "matplotlib >= 3.7.1",
+    "humanize >=4.6.0",
+    "matplotlib >=3.7.1",
     # For vendored ibis-framework.
     "atpublic>=2.3,<6",
     "parsy>=2,<3",

From f91756a4413b10f1072c0ae96301fe854bb1ba4e Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Mon, 13 Jan 2025 13:27:52 -0800
Subject: [PATCH 18/22] fix: Fix erroneous window bounds removal during
 compilation (#1163)

---
 bigframes/core/block_transforms.py            |   3 +-
 bigframes/core/compile/aggregate_compiler.py  |   9 +
 bigframes/core/compile/compiled.py            |   2 +-
 bigframes/operations/aggregations.py          |  13 +
 tests/system/small/test_unordered.py          |  21 +
 .../ibis/backends/sql/compilers/bigquery.py   | 770 ------------------
 .../sql/compilers/bigquery/__init__.py        |  51 +-
 .../ibis/backends/sql/rewrites.py             |  25 +-
 .../ibis/expr/operations/window.py            |   5 +-
 9 files changed, 84 insertions(+), 815 deletions(-)
 delete mode 100644 third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery.py

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 785691edd6..a7f75e7264 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -86,9 +86,10 @@ def indicate_duplicates(
         # Discard this value if there are copies ANYWHERE
         window_spec = windows.unbound(grouping_keys=tuple(columns))
     block, dummy = block.create_constant(1)
+    # use row number as will work even with partial ordering
     block, val_count_col_id = block.apply_window_op(
         dummy,
-        agg_ops.count_op,
+        agg_ops.sum_op,
         window_spec=window_spec,
     )
     block, duplicate_indicator = block.project_expr(
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index 482c38ae3d..f97856efa5 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -479,6 +479,15 @@ def _(
     return _apply_window_if_present(column.dense_rank(), window) + 1
 
 
+@compile_unary_agg.register
+def _(
+    op: agg_ops.RowNumberOp,
+    column: ibis_types.Column,
+    window=None,
+) -> ibis_types.IntegerValue:
+    return _apply_window_if_present(ibis_api.row_number(), window)
+
+
 @compile_unary_agg.register
 def _(op: agg_ops.FirstOp, column: ibis_types.Column, window=None) -> ibis_types.Value:
     return _apply_window_if_present(column.first(), window)
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index d4c814145b..f879eb3feb 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -1330,7 +1330,7 @@ def _ibis_window_from_spec(
             if require_total_order or isinstance(window_spec.bounds, RowsWindowBounds):
                 # Some operators need an unambiguous ordering, so the table's total ordering is appended
                 order_by = tuple([*order_by, *self._ibis_order])
-        elif isinstance(window_spec.bounds, RowsWindowBounds):
+        elif require_total_order or isinstance(window_spec.bounds, RowsWindowBounds):
             # If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
             order_by = tuple(self._ibis_order)
         else:
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 6b7f56d708..9de58fe5db 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -379,6 +379,19 @@ def skips_nulls(self):
         return True
 
 
+# This should really by a NullaryWindowOp, but APIs don't support that yet.
+@dataclasses.dataclass(frozen=True)
+class RowNumberOp(UnaryWindowOp):
+    name: ClassVar[str] = "rownumber"
+
+    @property
+    def skips_nulls(self):
+        return False
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        return dtypes.INT_DTYPE
+
+
 @dataclasses.dataclass(frozen=True)
 class RankOp(UnaryWindowOp):
     name: ClassVar[str] = "rank"
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
index fe3411e266..106997f3e9 100644
--- a/tests/system/small/test_unordered.py
+++ b/tests/system/small/test_unordered.py
@@ -152,6 +152,27 @@ def test_unordered_merge(unordered_session):
     assert_pandas_df_equal(bf_result.to_pandas(), pd_result, ignore_order=True)
 
 
+def test_unordered_drop_duplicates_ambiguous(unordered_session):
+    pd_df = pd.DataFrame(
+        {"a": [1, 1, 1], "b": [4, 4, 6], "c": [1, 1, 3]}, dtype=pd.Int64Dtype()
+    )
+    bf_df = bpd.DataFrame(pd_df, session=unordered_session)
+
+    # merge first to discard original ordering
+    bf_result = (
+        bf_df.merge(bf_df, left_on="a", right_on="c")
+        .sort_values("c_y")
+        .drop_duplicates()
+    )
+    pd_result = (
+        pd_df.merge(pd_df, left_on="a", right_on="c")
+        .sort_values("c_y")
+        .drop_duplicates()
+    )
+
+    assert_pandas_df_equal(bf_result.to_pandas(), pd_result, ignore_order=True)
+
+
 def test_unordered_mode_cache_preserves_order(unordered_session):
     pd_df = pd.DataFrame(
         {"a": [1, 2, 3, 4, 5, 6], "b": [4, 5, 9, 3, 1, 6]}, dtype=pd.Int64Dtype()
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery.py
deleted file mode 100644
index c090a1ca8f..0000000000
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery.py
+++ /dev/null
@@ -1,770 +0,0 @@
-"""Module to convert from Ibis expression to SQL string."""
-
-from __future__ import annotations
-
-import re
-
-from bigframes_vendored.ibis import util
-from bigframes_vendored.ibis.backends.sql.compilers.base import (
-    NULL,
-    SQLGlotCompiler,
-    STAR,
-)
-from bigframes_vendored.ibis.backends.sql.datatypes import BigQueryType, BigQueryUDFType
-from bigframes_vendored.ibis.backends.sql.rewrites import (
-    exclude_unsupported_window_frame_from_ops,
-    exclude_unsupported_window_frame_from_rank,
-    exclude_unsupported_window_frame_from_row_number,
-)
-import bigframes_vendored.ibis.common.exceptions as com
-from bigframes_vendored.ibis.common.temporal import (
-    DateUnit,
-    IntervalUnit,
-    TimestampUnit,
-    TimeUnit,
-)
-import bigframes_vendored.ibis.expr.datatypes as dt
-import bigframes_vendored.ibis.expr.operations as ops
-import sqlglot as sg
-from sqlglot.dialects import BigQuery
-import sqlglot.expressions as sge
-
-_NAME_REGEX = re.compile(r'[^!"$()*,./;?@[\\\]^`{}~\n]+')
-
-
-class BigQueryCompiler(SQLGlotCompiler):
-    dialect = BigQuery
-    type_mapper = BigQueryType
-    udf_type_mapper = BigQueryUDFType
-    rewrites = (
-        exclude_unsupported_window_frame_from_ops,
-        exclude_unsupported_window_frame_from_row_number,
-        exclude_unsupported_window_frame_from_rank,
-        *SQLGlotCompiler.rewrites,
-    )
-
-    UNSUPPORTED_OPS = (
-        ops.DateDiff,
-        ops.ExtractAuthority,
-        ops.ExtractUserInfo,
-        ops.FindInSet,
-        ops.Median,
-        ops.Quantile,
-        ops.MultiQuantile,
-        ops.RegexSplit,
-        ops.RowID,
-        ops.TimestampBucket,
-        ops.TimestampDiff,
-    )
-
-    NAN = sge.Cast(
-        this=sge.convert("NaN"), to=sge.DataType(this=sge.DataType.Type.DOUBLE)
-    )
-    POS_INF = sge.Cast(
-        this=sge.convert("Infinity"), to=sge.DataType(this=sge.DataType.Type.DOUBLE)
-    )
-    NEG_INF = sge.Cast(
-        this=sge.convert("-Infinity"), to=sge.DataType(this=sge.DataType.Type.DOUBLE)
-    )
-
-    SIMPLE_OPS = {
-        ops.Arbitrary: "any_value",
-        ops.StringAscii: "ascii",
-        ops.BitAnd: "bit_and",
-        ops.BitOr: "bit_or",
-        ops.BitXor: "bit_xor",
-        ops.DateFromYMD: "date",
-        ops.Divide: "ieee_divide",
-        ops.EndsWith: "ends_with",
-        ops.GeoArea: "st_area",
-        ops.GeoAsBinary: "st_asbinary",
-        ops.GeoAsText: "st_astext",
-        ops.GeoAzimuth: "st_azimuth",
-        ops.GeoBuffer: "st_buffer",
-        ops.GeoCentroid: "st_centroid",
-        ops.GeoContains: "st_contains",
-        ops.GeoCoveredBy: "st_coveredby",
-        ops.GeoCovers: "st_covers",
-        ops.GeoDWithin: "st_dwithin",
-        ops.GeoDifference: "st_difference",
-        ops.GeoDisjoint: "st_disjoint",
-        ops.GeoDistance: "st_distance",
-        ops.GeoEndPoint: "st_endpoint",
-        ops.GeoEquals: "st_equals",
-        ops.GeoGeometryType: "st_geometrytype",
-        ops.GeoIntersection: "st_intersection",
-        ops.GeoIntersects: "st_intersects",
-        ops.GeoLength: "st_length",
-        ops.GeoMaxDistance: "st_maxdistance",
-        ops.GeoNPoints: "st_numpoints",
-        ops.GeoPerimeter: "st_perimeter",
-        ops.GeoPoint: "st_geogpoint",
-        ops.GeoPointN: "st_pointn",
-        ops.GeoStartPoint: "st_startpoint",
-        ops.GeoTouches: "st_touches",
-        ops.GeoUnaryUnion: "st_union_agg",
-        ops.GeoUnion: "st_union",
-        ops.GeoWithin: "st_within",
-        ops.GeoX: "st_x",
-        ops.GeoY: "st_y",
-        ops.Hash: "farm_fingerprint",
-        ops.IsInf: "is_inf",
-        ops.IsNan: "is_nan",
-        ops.Log10: "log10",
-        ops.LPad: "lpad",
-        ops.RPad: "rpad",
-        ops.Levenshtein: "edit_distance",
-        ops.Modulus: "mod",
-        ops.RegexReplace: "regexp_replace",
-        ops.RegexSearch: "regexp_contains",
-        ops.Time: "time",
-        ops.TimeFromHMS: "time_from_parts",
-        ops.TimestampNow: "current_timestamp",
-        ops.ExtractHost: "net.host",
-    }
-
-    @staticmethod
-    def _minimize_spec(start, end, spec):
-        if (
-            start is None
-            and isinstance(getattr(end, "value", None), ops.Literal)
-            and end.value.value == 0
-            and end.following
-        ):
-            return None
-        return spec
-
-    def visit_BoundingBox(self, op, *, arg):
-        name = type(op).__name__[len("Geo") :].lower()
-        return sge.Dot(
-            this=self.f.st_boundingbox(arg), expression=sg.to_identifier(name)
-        )
-
-    visit_GeoXMax = visit_GeoXMin = visit_GeoYMax = visit_GeoYMin = visit_BoundingBox
-
-    def visit_GeoSimplify(self, op, *, arg, tolerance, preserve_collapsed):
-        if (
-            not isinstance(op.preserve_collapsed, ops.Literal)
-            or op.preserve_collapsed.value
-        ):
-            raise com.UnsupportedOperationError(
-                "BigQuery simplify does not support preserving collapsed geometries, "
-                "pass preserve_collapsed=False"
-            )
-        return self.f.st_simplify(arg, tolerance)
-
-    def visit_ApproxMedian(self, op, *, arg, where):
-        return self.agg.approx_quantiles(arg, 2, where=where)[self.f.offset(1)]
-
-    def visit_Pi(self, op):
-        return self.f.acos(-1)
-
-    def visit_E(self, op):
-        return self.f.exp(1)
-
-    def visit_TimeDelta(self, op, *, left, right, part):
-        return self.f.time_diff(left, right, part, dialect=self.dialect)
-
-    def visit_DateDelta(self, op, *, left, right, part):
-        return self.f.date_diff(left, right, part, dialect=self.dialect)
-
-    def visit_TimestampDelta(self, op, *, left, right, part):
-        left_tz = op.left.dtype.timezone
-        right_tz = op.right.dtype.timezone
-
-        if left_tz is None and right_tz is None:
-            return self.f.datetime_diff(left, right, part)
-        elif left_tz is not None and right_tz is not None:
-            return self.f.timestamp_diff(left, right, part)
-
-        raise com.UnsupportedOperationError(
-            "timestamp difference with mixed timezone/timezoneless values is not implemented"
-        )
-
-    def visit_GroupConcat(self, op, *, arg, sep, where):
-        if where is not None:
-            arg = self.if_(where, arg, NULL)
-        return self.f.string_agg(arg, sep)
-
-    def visit_FloorDivide(self, op, *, left, right):
-        return self.cast(self.f.floor(self.f.ieee_divide(left, right)), op.dtype)
-
-    def visit_Log2(self, op, *, arg):
-        return self.f.log(arg, 2, dialect=self.dialect)
-
-    def visit_Log(self, op, *, arg, base):
-        if base is None:
-            return self.f.ln(arg)
-        return self.f.log(arg, base, dialect=self.dialect)
-
-    def visit_ArrayRepeat(self, op, *, arg, times):
-        start = step = 1
-        array_length = self.f.array_length(arg)
-        stop = self.f.greatest(times, 0) * array_length
-        i = sg.to_identifier("i")
-        idx = self.f.coalesce(
-            self.f.nullif(self.f.mod(i, array_length), 0), array_length
-        )
-        series = self.f.generate_array(start, stop, step)
-        return self.f.array(
-            sg.select(arg[self.f.safe_ordinal(idx)]).from_(self._unnest(series, as_=i))
-        )
-
-    def visit_NthValue(self, op, *, arg, nth):
-        if not isinstance(op.nth, ops.Literal):
-            raise com.UnsupportedOperationError(
-                f"BigQuery `nth` must be a literal; got {type(op.nth)}"
-            )
-        return self.f.nth_value(arg, nth)
-
-    def visit_StrRight(self, op, *, arg, nchars):
-        return self.f.substr(arg, -self.f.least(self.f.length(arg), nchars))
-
-    def visit_StringJoin(self, op, *, arg, sep):
-        return self.f.array_to_string(self.f.array(*arg), sep)
-
-    def visit_DayOfWeekIndex(self, op, *, arg):
-        return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7)
-
-    def visit_DayOfWeekName(self, op, *, arg):
-        return self.f.initcap(sge.Cast(this=arg, to="STRING FORMAT 'DAY'"))
-
-    def visit_StringToTimestamp(self, op, *, arg, format_str):
-        if (timezone := op.dtype.timezone) is not None:
-            return self.f.parse_timestamp(format_str, arg, timezone)
-        return self.f.parse_datetime(format_str, arg)
-
-    def visit_ArrayCollect(self, op, *, arg, where):
-        if where is not None:
-            arg = self.if_(where, arg, NULL)
-        return self.f.array_agg(sge.IgnoreNulls(this=arg))
-
-    def _neg_idx_to_pos(self, arg, idx):
-        return self.if_(idx < 0, self.f.array_length(arg) + idx, idx)
-
-    def visit_ArraySlice(self, op, *, arg, start, stop):
-        index = sg.to_identifier("bq_arr_slice")
-        cond = [index >= self._neg_idx_to_pos(arg, start)]
-
-        if stop is not None:
-            cond.append(index < self._neg_idx_to_pos(arg, stop))
-
-        el = sg.to_identifier("el")
-        return self.f.array(
-            sg.select(el).from_(self._unnest(arg, as_=el, offset=index)).where(*cond)
-        )
-
-    def visit_ArrayIndex(self, op, *, arg, index):
-        return arg[self.f.safe_offset(index)]
-
-    def visit_ArrayContains(self, op, *, arg, other):
-        name = sg.to_identifier(util.gen_name("bq_arr_contains"))
-        return sge.Exists(
-            this=sg.select(sge.convert(1))
-            .from_(self._unnest(arg, as_=name))
-            .where(name.eq(other))
-        )
-
-    def visit_StringContains(self, op, *, haystack, needle):
-        return self.f.strpos(haystack, needle) > 0
-
-    def visti_StringFind(self, op, *, arg, substr, start, end):
-        if start is not None:
-            raise NotImplementedError(
-                "`start` not implemented for BigQuery string find"
-            )
-        if end is not None:
-            raise NotImplementedError("`end` not implemented for BigQuery string find")
-        return self.f.strpos(arg, substr)
-
-    def visit_TimestampFromYMDHMS(
-        self, op, *, year, month, day, hours, minutes, seconds
-    ):
-        return self.f.anon.DATETIME(year, month, day, hours, minutes, seconds)
-
-    def visit_NonNullLiteral(self, op, *, value, dtype):
-        if dtype.is_inet() or dtype.is_macaddr():
-            return sge.convert(str(value))
-        elif dtype.is_timestamp():
-            funcname = "DATETIME" if dtype.timezone is None else "TIMESTAMP"
-            return self.f.anon[funcname](value.isoformat())
-        elif dtype.is_date():
-            return self.f.date_from_parts(value.year, value.month, value.day)
-        elif dtype.is_time():
-            time = self.f.time_from_parts(value.hour, value.minute, value.second)
-            if micros := value.microsecond:
-                # bigquery doesn't support `time(12, 34, 56.789101)`, AKA a
-                # float seconds specifier, so add any non-zero micros to the
-                # time value
-                return sge.TimeAdd(
-                    this=time, expression=sge.convert(micros), unit=self.v.MICROSECOND
-                )
-            return time
-        elif dtype.is_binary():
-            return sge.Cast(
-                this=sge.convert(value.hex()),
-                to=sge.DataType(this=sge.DataType.Type.BINARY),
-                format=sge.convert("HEX"),
-            )
-        elif dtype.is_interval():
-            if dtype.unit == IntervalUnit.NANOSECOND:
-                raise com.UnsupportedOperationError(
-                    "BigQuery does not support nanosecond intervals"
-                )
-        elif dtype.is_uuid():
-            return sge.convert(str(value))
-        return None
-
-    def visit_IntervalFromInteger(self, op, *, arg, unit):
-        if unit == IntervalUnit.NANOSECOND:
-            raise com.UnsupportedOperationError(
-                "BigQuery does not support nanosecond intervals"
-            )
-        return sge.Interval(this=arg, unit=self.v[unit.singular])
-
-    def visit_Strftime(self, op, *, arg, format_str):
-        arg_dtype = op.arg.dtype
-        if arg_dtype.is_timestamp():
-            if (timezone := arg_dtype.timezone) is None:
-                return self.f.format_datetime(format_str, arg)
-            else:
-                return self.f.format_timestamp(format_str, arg, timezone)
-        elif arg_dtype.is_date():
-            return self.f.format_date(format_str, arg)
-        else:
-            assert arg_dtype.is_time(), arg_dtype
-            return self.f.format_time(format_str, arg)
-
-    def visit_IntervalMultiply(self, op, *, left, right):
-        unit = self.v[op.left.dtype.resolution.upper()]
-        return sge.Interval(this=self.f.extract(unit, left) * right, unit=unit)
-
-    def visit_TimestampFromUNIX(self, op, *, arg, unit):
-        unit = op.unit
-        if unit == TimestampUnit.SECOND:
-            return self.f.timestamp_seconds(arg)
-        elif unit == TimestampUnit.MILLISECOND:
-            return self.f.timestamp_millis(arg)
-        elif unit == TimestampUnit.MICROSECOND:
-            return self.f.timestamp_micros(arg)
-        elif unit == TimestampUnit.NANOSECOND:
-            return self.f.timestamp_micros(
-                self.cast(self.f.round(arg / 1_000), dt.int64)
-            )
-        else:
-            raise com.UnsupportedOperationError(f"Unit not supported: {unit}")
-
-    def visit_Cast(self, op, *, arg, to):
-        from_ = op.arg.dtype
-        if from_.is_timestamp() and to.is_integer():
-            return self.f.unix_micros(arg)
-        elif from_.is_integer() and to.is_timestamp():
-            return self.f.timestamp_seconds(arg)
-        elif from_.is_interval() and to.is_integer():
-            if from_.unit in {
-                IntervalUnit.WEEK,
-                IntervalUnit.QUARTER,
-                IntervalUnit.NANOSECOND,
-            }:
-                raise com.UnsupportedOperationError(
-                    f"BigQuery does not allow extracting date part `{from_.unit}` from intervals"
-                )
-            return self.f.extract(self.v[to.resolution.upper()], arg)
-        elif from_.is_integer() and to.is_interval():
-            return sge.Interval(this=arg, unit=self.v[to.unit.singular])
-        elif from_.is_floating() and to.is_integer():
-            return self.cast(self.f.trunc(arg), dt.int64)
-        return super().visit_Cast(op, arg=arg, to=to)
-
-    def visit_JSONGetItem(self, op, *, arg, index):
-        return arg[index]
-
-    def visit_UnwrapJSONString(self, op, *, arg):
-        return self.f.anon["safe.string"](arg)
-
-    def visit_UnwrapJSONInt64(self, op, *, arg):
-        return self.f.anon["safe.int64"](arg)
-
-    def visit_UnwrapJSONFloat64(self, op, *, arg):
-        return self.f.anon["safe.float64"](arg)
-
-    def visit_UnwrapJSONBoolean(self, op, *, arg):
-        return self.f.anon["safe.bool"](arg)
-
-    def visit_ExtractEpochSeconds(self, op, *, arg):
-        return self.f.unix_seconds(arg)
-
-    def visit_ExtractWeekOfYear(self, op, *, arg):
-        return self.f.extract(self.v.isoweek, arg)
-
-    def visit_ExtractIsoYear(self, op, *, arg):
-        return self.f.extract(self.v.isoyear, arg)
-
-    def visit_ExtractMillisecond(self, op, *, arg):
-        return self.f.extract(self.v.millisecond, arg)
-
-    def visit_ExtractMicrosecond(self, op, *, arg):
-        return self.f.extract(self.v.microsecond, arg)
-
-    def visit_TimestampTruncate(self, op, *, arg, unit):
-        if unit == IntervalUnit.NANOSECOND:
-            raise com.UnsupportedOperationError(
-                f"BigQuery does not support truncating {op.arg.dtype} values to unit {unit!r}"
-            )
-        elif unit == IntervalUnit.WEEK:
-            unit = "WEEK(MONDAY)"
-        else:
-            unit = unit.name
-        return self.f.timestamp_trunc(arg, self.v[unit], dialect=self.dialect)
-
-    def visit_DateTruncate(self, op, *, arg, unit):
-        if unit == DateUnit.WEEK:
-            unit = "WEEK(MONDAY)"
-        else:
-            unit = unit.name
-        return self.f.date_trunc(arg, self.v[unit], dialect=self.dialect)
-
-    def visit_TimeTruncate(self, op, *, arg, unit):
-        if unit == TimeUnit.NANOSECOND:
-            raise com.UnsupportedOperationError(
-                f"BigQuery does not support truncating {op.arg.dtype} values to unit {unit!r}"
-            )
-        else:
-            unit = unit.name
-        return self.f.time_trunc(arg, self.v[unit], dialect=self.dialect)
-
-    def _nullifzero(self, step, zero, step_dtype):
-        if step_dtype.is_interval():
-            return self.if_(step.eq(zero), NULL, step)
-        return self.f.nullif(step, zero)
-
-    def _zero(self, dtype):
-        if dtype.is_interval():
-            return self.f.make_interval()
-        return sge.convert(0)
-
-    def _sign(self, value, dtype):
-        if dtype.is_interval():
-            zero = self._zero(dtype)
-            return sge.Case(
-                ifs=[
-                    self.if_(value < zero, -1),
-                    self.if_(value.eq(zero), 0),
-                    self.if_(value > zero, 1),
-                ],
-                default=NULL,
-            )
-        return self.f.sign(value)
-
-    def _make_range(self, func, start, stop, step, step_dtype):
-        step_sign = self._sign(step, step_dtype)
-        delta_sign = self._sign(stop - start, step_dtype)
-        zero = self._zero(step_dtype)
-        nullifzero = self._nullifzero(step, zero, step_dtype)
-        condition = sg.and_(sg.not_(nullifzero.is_(NULL)), step_sign.eq(delta_sign))
-        gen_array = func(start, stop, step)
-        name = sg.to_identifier(util.gen_name("bq_arr_range"))
-        inner = (
-            sg.select(name)
-            .from_(self._unnest(gen_array, as_=name))
-            .where(name.neq(stop))
-        )
-        return self.if_(condition, self.f.array(inner), self.f.array())
-
-    def visit_IntegerRange(self, op, *, start, stop, step):
-        return self._make_range(self.f.generate_array, start, stop, step, op.step.dtype)
-
-    def visit_TimestampRange(self, op, *, start, stop, step):
-        if op.start.dtype.timezone is None or op.stop.dtype.timezone is None:
-            raise com.IbisTypeError(
-                "Timestamps without timezone values are not supported when generating timestamp ranges"
-            )
-        return self._make_range(
-            self.f.generate_timestamp_array, start, stop, step, op.step.dtype
-        )
-
-    def visit_First(self, op, *, arg, where):
-        if where is not None:
-            arg = self.if_(where, arg, NULL)
-        array = self.f.array_agg(
-            sge.Limit(this=sge.IgnoreNulls(this=arg), expression=sge.convert(1)),
-        )
-        return array[self.f.safe_offset(0)]
-
-    def visit_Last(self, op, *, arg, where):
-        if where is not None:
-            arg = self.if_(where, arg, NULL)
-        array = self.f.array_reverse(self.f.array_agg(sge.IgnoreNulls(this=arg)))
-        return array[self.f.safe_offset(0)]
-
-    def visit_ArrayFilter(self, op, *, arg, body, param):
-        return self.f.array(
-            sg.select(param).from_(self._unnest(arg, as_=param)).where(body)
-        )
-
-    def visit_ArrayMap(self, op, *, arg, body, param):
-        return self.f.array(sg.select(body).from_(self._unnest(arg, as_=param)))
-
-    def visit_ArrayZip(self, op, *, arg):
-        lengths = [self.f.array_length(arr) - 1 for arr in arg]
-        idx = sg.to_identifier(util.gen_name("bq_arr_idx"))
-        indices = self._unnest(
-            self.f.generate_array(0, self.f.greatest(*lengths)), as_=idx
-        )
-        struct_fields = [
-            arr[self.f.safe_offset(idx)].as_(name)
-            for name, arr in zip(op.dtype.value_type.names, arg)
-        ]
-        return self.f.array(
-            sge.Select(kind="STRUCT", expressions=struct_fields).from_(indices)
-        )
-
-    def visit_ArrayPosition(self, op, *, arg, other):
-        name = sg.to_identifier(util.gen_name("bq_arr"))
-        idx = sg.to_identifier(util.gen_name("bq_arr_idx"))
-        unnest = self._unnest(arg, as_=name, offset=idx)
-        return self.f.coalesce(
-            sg.select(idx + 1).from_(unnest).where(name.eq(other)).limit(1).subquery(),
-            0,
-        )
-
-    def _unnest(self, expression, *, as_, offset=None):
-        alias = sge.TableAlias(columns=[sg.to_identifier(as_)])
-        return sge.Unnest(expressions=[expression], alias=alias, offset=offset)
-
-    def visit_ArrayRemove(self, op, *, arg, other):
-        name = sg.to_identifier(util.gen_name("bq_arr"))
-        unnest = self._unnest(arg, as_=name)
-        return self.f.array(sg.select(name).from_(unnest).where(name.neq(other)))
-
-    def visit_ArrayDistinct(self, op, *, arg):
-        name = util.gen_name("bq_arr")
-        return self.f.array(
-            sg.select(name).distinct().from_(self._unnest(arg, as_=name))
-        )
-
-    def visit_ArraySort(self, op, *, arg):
-        name = util.gen_name("bq_arr")
-        return self.f.array(
-            sg.select(name).from_(self._unnest(arg, as_=name)).order_by(name)
-        )
-
-    def visit_ArrayUnion(self, op, *, left, right):
-        lname = util.gen_name("bq_arr_left")
-        rname = util.gen_name("bq_arr_right")
-        lhs = sg.select(lname).from_(self._unnest(left, as_=lname))
-        rhs = sg.select(rname).from_(self._unnest(right, as_=rname))
-        return self.f.array(sg.union(lhs, rhs, distinct=True))
-
-    def visit_ArrayIntersect(self, op, *, left, right):
-        lname = util.gen_name("bq_arr_left")
-        rname = util.gen_name("bq_arr_right")
-        lhs = sg.select(lname).from_(self._unnest(left, as_=lname))
-        rhs = sg.select(rname).from_(self._unnest(right, as_=rname))
-        return self.f.array(sg.intersect(lhs, rhs, distinct=True))
-
-    def visit_RegexExtract(self, op, *, arg, pattern, index):
-        matches = self.f.regexp_contains(arg, pattern)
-        nonzero_index_replace = self.f.regexp_replace(
-            arg,
-            self.f.concat(".*?", pattern, ".*"),
-            self.f.concat("\\", self.cast(index, dt.string)),
-        )
-        zero_index_replace = self.f.regexp_replace(
-            arg, self.f.concat(".*?", self.f.concat("(", pattern, ")"), ".*"), "\\1"
-        )
-        extract = self.if_(index.eq(0), zero_index_replace, nonzero_index_replace)
-        return self.if_(matches, extract, NULL)
-
-    def visit_TimestampAddSub(self, op, *, left, right):
-        if not isinstance(right, sge.Interval):
-            raise com.OperationNotDefinedError(
-                "BigQuery does not support non-literals on the right side of timestamp add/subtract"
-            )
-        if (unit := op.right.dtype.unit) == IntervalUnit.NANOSECOND:
-            raise com.UnsupportedOperationError(
-                f"BigQuery does not allow binary operation {type(op).__name__} with "
-                f"INTERVAL offset {unit}"
-            )
-
-        opname = type(op).__name__[len("Timestamp") :]
-        funcname = f"TIMESTAMP_{opname.upper()}"
-        return self.f.anon[funcname](left, right)
-
-    visit_TimestampAdd = visit_TimestampSub = visit_TimestampAddSub
-
-    def visit_DateAddSub(self, op, *, left, right):
-        if not isinstance(right, sge.Interval):
-            raise com.OperationNotDefinedError(
-                "BigQuery does not support non-literals on the right side of date add/subtract"
-            )
-        if not (unit := op.right.dtype.unit).is_date():
-            raise com.UnsupportedOperationError(
-                f"BigQuery does not allow binary operation {type(op).__name__} with "
-                f"INTERVAL offset {unit}"
-            )
-        opname = type(op).__name__[len("Date") :]
-        funcname = f"DATE_{opname.upper()}"
-        return self.f.anon[funcname](left, right)
-
-    visit_DateAdd = visit_DateSub = visit_DateAddSub
-
-    def visit_Covariance(self, op, *, left, right, how, where):
-        if where is not None:
-            left = self.if_(where, left, NULL)
-            right = self.if_(where, right, NULL)
-
-        if op.left.dtype.is_boolean():
-            left = self.cast(left, dt.int64)
-
-        if op.right.dtype.is_boolean():
-            right = self.cast(right, dt.int64)
-
-        how = op.how[:4].upper()
-        assert how in ("POP", "SAMP"), 'how not in ("POP", "SAMP")'
-        return self.agg[f"COVAR_{how}"](left, right, where=where)
-
-    def visit_Correlation(self, op, *, left, right, how, where):
-        if how == "sample":
-            raise ValueError(f"Correlation with how={how!r} is not supported.")
-
-        if where is not None:
-            left = self.if_(where, left, NULL)
-            right = self.if_(where, right, NULL)
-
-        if op.left.dtype.is_boolean():
-            left = self.cast(left, dt.int64)
-
-        if op.right.dtype.is_boolean():
-            right = self.cast(right, dt.int64)
-
-        return self.agg.corr(left, right, where=where)
-
-    def visit_TypeOf(self, op, *, arg):
-        return self._pudf("typeof", arg)
-
-    def visit_Xor(self, op, *, left, right):
-        return sg.or_(sg.and_(left, sg.not_(right)), sg.and_(sg.not_(left), right))
-
-    def visit_HashBytes(self, op, *, arg, how):
-        if how not in ("md5", "sha1", "sha256", "sha512"):
-            raise NotImplementedError(how)
-        return self.f[how](arg)
-
-    @staticmethod
-    def _gen_valid_name(name: str) -> str:
-        return "_".join(map(str.strip, _NAME_REGEX.findall(name))) or "tmp"
-
-    def visit_CountStar(self, op, *, arg, where):
-        if where is not None:
-            return self.f.countif(where)
-        return self.f.count(STAR)
-
-    def visit_CountDistinctStar(self, op, *, where, arg):
-        # Bigquery does not support count(distinct a,b,c) or count(distinct (a, b, c))
-        # as expressions must be "groupable":
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#group_by_grouping_item
-        #
-        # Instead, convert the entire expression to a string
-        # SELECT COUNT(DISTINCT concat(to_json_string(a), to_json_string(b)))
-        # This works with an array of datatypes which generates a unique string
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
-        row = sge.Concat(
-            expressions=[
-                self.f.to_json_string(sg.column(x, quoted=self.quoted))
-                for x in op.arg.schema.keys()
-            ]
-        )
-        if where is not None:
-            row = self.if_(where, row, NULL)
-        return self.f.count(sge.Distinct(expressions=[row]))
-
-    def visit_Degrees(self, op, *, arg):
-        return self._pudf("degrees", arg)
-
-    def visit_Radians(self, op, *, arg):
-        return self._pudf("radians", arg)
-
-    def visit_CountDistinct(self, op, *, arg, where):
-        if where is not None:
-            arg = self.if_(where, arg, NULL)
-        return self.f.count(sge.Distinct(expressions=[arg]))
-
-    def visit_RandomUUID(self, op, **kwargs):
-        return self.f.generate_uuid()
-
-    def visit_ExtractFile(self, op, *, arg):
-        return self._pudf("cw_url_extract_file", arg)
-
-    def visit_ExtractFragment(self, op, *, arg):
-        return self._pudf("cw_url_extract_fragment", arg)
-
-    def visit_ExtractPath(self, op, *, arg):
-        return self._pudf("cw_url_extract_path", arg)
-
-    def visit_ExtractProtocol(self, op, *, arg):
-        return self._pudf("cw_url_extract_protocol", arg)
-
-    def visit_ExtractQuery(self, op, *, arg, key):
-        if key is not None:
-            return self._pudf("cw_url_extract_parameter", arg, key)
-        else:
-            return self._pudf("cw_url_extract_query", arg)
-
-    def _pudf(self, name, *args):
-        name = sg.table(name, db="persistent_udfs", catalog="bigquery-public-data").sql(
-            self.dialect
-        )
-        return self.f[name](*args)
-
-    def visit_DropColumns(self, op, *, parent, columns_to_drop):
-        quoted = self.quoted
-        excludes = [sg.column(column, quoted=quoted) for column in columns_to_drop]
-        star = sge.Star(**{"except": excludes})
-        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
-        column = sge.Column(this=star, table=table)
-        return sg.select(column).from_(parent)
-
-    def visit_TableUnnest(
-        self, op, *, parent, column, offset: str | None, keep_empty: bool
-    ):
-        quoted = self.quoted
-
-        column_alias = sg.to_identifier(
-            util.gen_name("table_unnest_column"), quoted=quoted
-        )
-
-        selcols = []
-
-        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
-
-        opname = op.column.name
-        overlaps_with_parent = opname in op.parent.schema
-        computed_column = column_alias.as_(opname, quoted=quoted)
-
-        # replace the existing column if the unnested column hasn't been
-        # renamed
-        #
-        # e.g., table.unnest("x")
-        if overlaps_with_parent:
-            selcols.append(
-                sge.Column(this=sge.Star(replace=[computed_column]), table=table)
-            )
-        else:
-            selcols.append(sge.Column(this=STAR, table=table))
-            selcols.append(computed_column)
-
-        if offset is not None:
-            offset = sg.to_identifier(offset, quoted=quoted)
-            selcols.append(offset)
-
-        unnest = sge.Unnest(
-            expressions=[column],
-            alias=sge.TableAlias(columns=[column_alias]),
-            offset=offset,
-        )
-        return (
-            sg.select(*selcols)
-            .from_(parent)
-            .join(unnest, join_type="CROSS" if not keep_empty else "LEFT")
-        )
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
index 9de3e09540..3793a09229 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/compilers/bigquery/__init__.py
@@ -252,17 +252,6 @@ def to_sqlglot(
         sources.append(result)
         return sources
 
-    @staticmethod
-    def _minimize_spec(start, end, spec):
-        if (
-            start is None
-            and isinstance(getattr(end, "value", None), ops.Literal)
-            and end.value.value == 0
-            and end.following
-        ):
-            return None
-        return spec
-
     def visit_BoundingBox(self, op, *, arg):
         name = type(op).__name__[len("Geo") :].lower()
         return sge.Dot(
@@ -1105,27 +1094,26 @@ def visit_Quantile(self, op, *, arg, quantile, where):
 
     def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by):
         # Patch for https://github.com/ibis-project/ibis/issues/9872
-        if start is None and end is None:
-            spec = None
-        else:
-            if start is None:
-                start = {}
-            if end is None:
-                end = {}
 
-            start_value = start.get("value", "UNBOUNDED")
-            start_side = start.get("side", "PRECEDING")
-            end_value = end.get("value", "UNBOUNDED")
-            end_side = end.get("side", "FOLLOWING")
+        if start is None:
+            start = {}
+        if end is None:
+            end = {}
 
-            if getattr(start_value, "this", None) == "0":
-                start_value = "CURRENT ROW"
-                start_side = None
+        start_value = start.get("value", "UNBOUNDED")
+        start_side = start.get("side", "PRECEDING")
+        end_value = end.get("value", "UNBOUNDED")
+        end_side = end.get("side", "FOLLOWING")
 
-            if getattr(end_value, "this", None) == "0":
-                end_value = "CURRENT ROW"
-                end_side = None
+        if getattr(start_value, "this", None) == "0":
+            start_value = "CURRENT ROW"
+            start_side = None
 
+        if getattr(end_value, "this", None) == "0":
+            end_value = "CURRENT ROW"
+            end_side = None
+
+        if how != "none":
             spec = sge.WindowSpec(
                 kind=how.upper(),
                 start=start_value,
@@ -1134,7 +1122,12 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by)
                 end_side=end_side,
                 over="OVER",
             )
-            spec = self._minimize_spec(op.start, op.end, spec)
+        else:
+            spec = None
+
+        # If unordered, unbound range window is implicit
+        if (not order_by) and (not start) and (not end):
+            spec = None
 
         order = sge.Order(expressions=order_by) if order_by else None
 
diff --git a/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py b/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
index 65119aa40a..b2ef6a15d3 100644
--- a/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
+++ b/third_party/bigframes_vendored/ibis/backends/sql/rewrites.py
@@ -400,25 +400,26 @@ def rewrite_empty_order_by_window(_, **kwargs):
     return _.copy(order_by=(ops.NULL,))
 
 
-@replace(p.WindowFunction(p.RowNumber | p.NTile))
+@replace(p.WindowFunction(p.RowNumber | p.NTile | p.MinRank | p.DenseRank))
 def exclude_unsupported_window_frame_from_row_number(_, **kwargs):
-    return ops.Subtract(_.copy(start=None, end=0), 1)
+    # These functions do not support window bounds, only an ordering.
+    # Also, its kind of messy to insert subtract here, should probably be in visitor
+    return ops.Subtract(
+        _.copy(how="none", start=None, end=None, order_by=_.order_by or (ops.NULL,)), 1
+    )
 
 
-@replace(p.WindowFunction(p.MinRank | p.DenseRank, start=None))
+@replace(p.WindowFunction(p.PercentRank | p.CumeDist, start=None))
 def exclude_unsupported_window_frame_from_rank(_, **kwargs):
-    return ops.Subtract(
-        _.copy(start=None, end=0, order_by=_.order_by or (ops.NULL,)), 1
-    )
+    # These functions do not support window bounds, only an ordering.
+    # Also, its kind of messy to insert subtract here, should probably be in visitor
+    return _.copy(how="none", start=None, end=None, order_by=_.order_by or (ops.NULL,))
 
 
-@replace(
-    p.WindowFunction(
-        p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, start=None
-    )
-)
+@replace(p.WindowFunction(p.Lag | p.Lead, start=None))
 def exclude_unsupported_window_frame_from_ops(_, **kwargs):
-    return _.copy(start=None, end=0, order_by=_.order_by or (ops.NULL,))
+    # lag/lead dont' support bounds, but do support ordering
+    return _.copy(how="none", start=None, end=None, order_by=_.order_by or (ops.NULL,))
 
 
 # Rewrite rules for lowering a high-level operation into one composed of more
diff --git a/third_party/bigframes_vendored/ibis/expr/operations/window.py b/third_party/bigframes_vendored/ibis/expr/operations/window.py
index 0c9ae91fc7..0fcecb4109 100644
--- a/third_party/bigframes_vendored/ibis/expr/operations/window.py
+++ b/third_party/bigframes_vendored/ibis/expr/operations/window.py
@@ -69,7 +69,8 @@ class WindowFunction(Value):
     """Window function operation."""
 
     func: Analytic | Reduction
-    how: LiteralType["rows", "range"] = "rows"  # noqa: F821
+    # none is a hacky way to express that window bounds are not supported (eg row_number())
+    how: LiteralType["rows", "range", "none"] = "rows"  # noqa: F821
     start: Optional[WindowBoundary[dt.Numeric | dt.Interval]] = None
     end: Optional[WindowBoundary[dt.Numeric | dt.Interval]] = None
     group_by: VarTuple[Column] = ()
@@ -100,7 +101,7 @@ def __init__(self, how, start, end, **kwargs):
                 raise com.IbisTypeError(
                     "Window frame start and end boundaries must have the same datatype"
                 )
-        else:
+        elif how != "none":
             raise com.IbisTypeError(
                 f"Window frame type must be either 'rows' or 'range', got {how}"
             )

From 788f6e94a1e80f0ba8741a53a05a467e7b18e902 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Mon, 13 Jan 2025 17:05:01 -0600
Subject: [PATCH 19/22] fix: avoid global mutation in
 `BigQueryOptions.client_endpoints_override` (#1280)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: avoid global mutation in `BigQueryOptions.client_endpoints_override`

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 bigframes/_config/bigquery_options.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 052ad5d921..0048bfd1ad 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -91,7 +91,7 @@ def __init__(
         skip_bq_connection_check: bool = False,
         *,
         ordering_mode: Literal["strict", "partial"] = "strict",
-        client_endpoints_override: dict = {},
+        client_endpoints_override: Optional[dict] = None,
     ):
         self._credentials = credentials
         self._project = project
@@ -104,6 +104,10 @@ def __init__(
         self._session_started = False
         # Determines the ordering strictness for the session.
         self._ordering_mode = _validate_ordering_mode(ordering_mode)
+
+        if client_endpoints_override is None:
+            client_endpoints_override = {}
+
         self._client_endpoints_override = client_endpoints_override
 
     @property

From 6903f710086037d8ec95e74f76046dd72c629ef1 Mon Sep 17 00:00:00 2001
From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com>
Date: Mon, 13 Jan 2025 17:07:24 -0600
Subject: [PATCH 20/22] chore(python): exclude .github/workflows/unittest.yml
 in renovate config (#1277)

Source-Link: https://github.com/googleapis/synthtool/commit/106d292bd234e5d9977231dcfbc4831e34eba13a
Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
---
 .github/.OwlBot.lock.yaml            |  6 ++--
 .github/workflows/unittest.yml       |  5 ++-
 .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------
 renovate.json                        |  2 +-
 4 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 6301519a9a..10cf433a8b 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -1,4 +1,4 @@
-# Copyright 2024 Google LLC
+# Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562
-# created: 2024-11-12T12:09:45.821174897Z
+  digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a
+# created: 2025-01-09T12:01:16.422459506Z
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index ce5137e58c..8659d83d82 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -5,7 +5,10 @@ on:
 name: unittest
 jobs:
   unit:
-    runs-on: ubuntu-latest
+    # TODO(https://github.com/googleapis/gapic-generator-python/issues/2303): use `ubuntu-latest` once this bug is fixed.
+    # Use ubuntu-22.04 until Python 3.7 is removed from the test matrix
+    # https://docs.github.com/en/actions/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
         python: ['3.9', '3.10', '3.11', '3.12']
diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt
index 8bb0764594..f99a5c4aac 100644
--- a/.kokoro/docker/docs/requirements.txt
+++ b/.kokoro/docker/docs/requirements.txt
@@ -2,11 +2,11 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --generate-hashes requirements.in
+#    pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in
 #
-argcomplete==3.5.1 \
-    --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \
-    --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4
+argcomplete==3.5.2 \
+    --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \
+    --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb
     # via nox
 colorlog==6.9.0 \
     --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \
@@ -23,7 +23,7 @@ filelock==3.16.1 \
 nox==2024.10.9 \
     --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \
     --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95
-    # via -r requirements.in
+    # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in
 packaging==24.2 \
     --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
     --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
@@ -32,11 +32,41 @@ platformdirs==4.3.6 \
     --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \
     --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb
     # via virtualenv
-tomli==2.0.2 \
-    --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \
-    --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed
+tomli==2.2.1 \
+    --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \
+    --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \
+    --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \
+    --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \
+    --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \
+    --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \
+    --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \
+    --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \
+    --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \
+    --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \
+    --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \
+    --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \
+    --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \
+    --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \
+    --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \
+    --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \
+    --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \
+    --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \
+    --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \
+    --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \
+    --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \
+    --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \
+    --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \
+    --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \
+    --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \
+    --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \
+    --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \
+    --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \
+    --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \
+    --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \
+    --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \
+    --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7
     # via nox
-virtualenv==20.27.1 \
-    --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \
-    --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4
+virtualenv==20.28.0 \
+    --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \
+    --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa
     # via nox
diff --git a/renovate.json b/renovate.json
index 39b2a0ec92..c7875c469b 100644
--- a/renovate.json
+++ b/renovate.json
@@ -5,7 +5,7 @@
     ":preserveSemverRanges",
     ":disableDependencyDashboard"
   ],
-  "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"],
+  "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"],
   "pip_requirements": {
     "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"]
   }

From f1a5629ae62da675c3065eb39dba540254f08931 Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Mon, 13 Jan 2025 15:08:48 -0800
Subject: [PATCH 21/22] chore: format warning message (#1281)

* chore: format warning message

* fix unit tests

* fix mypy

* use regional endpoints warning
---
 bigframes/_config/bigquery_options.py         | 27 +++---
 bigframes/_config/experiment_options.py       | 14 ++-
 bigframes/core/__init__.py                    | 26 +++---
 bigframes/core/blocks.py                      | 18 ++--
 bigframes/core/compile/ibis_types.py          |  8 +-
 bigframes/core/compile/polars/__init__.py     |  3 +-
 bigframes/core/global_session.py              |  7 +-
 bigframes/core/indexers.py                    | 11 +--
 bigframes/core/utils.py                       |  9 +-
 bigframes/dataframe.py                        | 14 ++-
 .../functions/_remote_function_session.py     |  9 +-
 bigframes/functions/remote_function.py        |  9 +-
 bigframes/ml/base.py                          | 12 +--
 bigframes/ml/llm.py                           | 74 ++++++++-------
 bigframes/ml/remote.py                        |  7 +-
 bigframes/operations/_matplotlib/core.py      |  7 +-
 bigframes/operations/semantics.py             | 15 ++--
 bigframes/session/__init__.py                 | 49 +++++-----
 .../session/_io/bigquery/read_gbq_table.py    | 59 ++++++------
 bigframes/session/executor.py                 |  3 +-
 bigframes/streaming/dataframe.py              | 15 ++--
 .../generative_ai/large_language_models.ipynb | 89 ++++++++++---------
 notebooks/streaming/streaming_dataframe.ipynb | 63 ++++++-------
 noxfile.py                                    |  4 +-
 tests/unit/_config/test_experiment_options.py |  5 +-
 25 files changed, 276 insertions(+), 281 deletions(-)

diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 0048bfd1ad..8fec253b24 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -25,7 +25,7 @@
 
 import bigframes.constants
 import bigframes.enums
-import bigframes.exceptions
+import bigframes.exceptions as bfe
 
 SESSION_STARTED_MESSAGE = (
     "Cannot change '{attribute}' once a session has started. "
@@ -55,15 +55,12 @@ def _get_validated_location(value: Optional[str]) -> Optional[str]:
         bigframes.constants.ALL_BIGQUERY_LOCATIONS,
         key=lambda item: jellyfish.levenshtein_distance(location, item),
     )
-    warnings.warn(
-        UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility),
-        # There are many layers before we get to (possibly) the user's code:
-        # -> bpd.options.bigquery.location = "us-central-1"
-        # -> location.setter
-        # -> _get_validated_location
-        stacklevel=3,
-        category=bigframes.exceptions.UnknownLocationWarning,
-    )
+    # There are many layers before we get to (possibly) the user's code:
+    # -> bpd.options.bigquery.location = "us-central-1"
+    # -> location.setter
+    # -> _get_validated_location
+    msg = UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility)
+    warnings.warn(msg, stacklevel=3, category=bfe.UnknownLocationWarning)
 
     return value
 
@@ -275,10 +272,11 @@ def use_regional_endpoints(self, value: bool):
             )
 
         if value:
-            warnings.warn(
+            msg = (
                 "Use of regional endpoints is a feature in preview and "
                 "available only in selected regions and projects. "
             )
+            warnings.warn(msg, category=bfe.PreviewWarning, stacklevel=2)
 
         self._use_regional_endpoints = value
 
@@ -334,9 +332,12 @@ def client_endpoints_override(self) -> dict:
 
     @client_endpoints_override.setter
     def client_endpoints_override(self, value: dict):
-        warnings.warn(
-            "This is an advanced configuration option for directly setting endpoints. Incorrect use may lead to unexpected behavior or system instability. Proceed only if you fully understand its implications."
+        msg = (
+            "This is an advanced configuration option for directly setting endpoints. "
+            "Incorrect use may lead to unexpected behavior or system instability. "
+            "Proceed only if you fully understand its implications."
         )
+        warnings.warn(msg)
 
         if self._session_started and self._client_endpoints_override != value:
             raise ValueError(
diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py
index 6b79dcf748..69273aef1c 100644
--- a/bigframes/_config/experiment_options.py
+++ b/bigframes/_config/experiment_options.py
@@ -14,6 +14,8 @@
 
 import warnings
 
+import bigframes.exceptions as bfe
+
 
 class ExperimentOptions:
     """
@@ -31,9 +33,11 @@ def semantic_operators(self) -> bool:
     @semantic_operators.setter
     def semantic_operators(self, value: bool):
         if value is True:
-            warnings.warn(
-                "Semantic operators are still under experiments, and are subject to change in the future."
+            msg = (
+                "Semantic operators are still under experiments, and are subject "
+                "to change in the future."
             )
+            warnings.warn(msg, category=bfe.PreviewWarning)
         self._semantic_operators = value
 
     @property
@@ -43,7 +47,9 @@ def blob(self) -> bool:
     @blob.setter
     def blob(self, value: bool):
         if value is True:
-            warnings.warn(
-                "BigFrames Blob is still under experiments. It may not work and subject to change in the future."
+            msg = (
+                "BigFrames Blob is still under experiments. It may not work and "
+                "subject to change in the future."
             )
+            warnings.warn(msg, category=bfe.PreviewWarning)
         self._blob = value
diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 5e3f6df355..a88e365dcd 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -39,6 +39,7 @@
 import bigframes.core.utils
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
 
@@ -106,10 +107,11 @@ def from_table(
         if offsets_col and primary_key:
             raise ValueError("must set at most one of 'offests', 'primary_key'")
         if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names):
-            warnings.warn(
-                "Interpreting JSON column(s) as StringDtype and pyarrow.large_string. This behavior may change in future versions.",
-                bigframes.exceptions.PreviewWarning,
+            msg = (
+                "Interpreting JSON column(s) as pyarrow.large_string. "
+                "This behavior may change in future versions."
             )
+            warnings.warn(msg, bfe.PreviewWarning)
         # define data source only for needed columns, this makes row-hashing cheaper
         table_def = nodes.GbqTable.from_table(table, columns=schema.names)
 
@@ -228,10 +230,8 @@ def slice(
         self, start: Optional[int], stop: Optional[int], step: Optional[int]
     ) -> ArrayValue:
         if self.node.order_ambiguous and not (self.session._strictly_ordered):
-            warnings.warn(
-                "Window ordering may be ambiguous, this can cause unstable results.",
-                bigframes.exceptions.AmbiguousWindowWarning,
-            )
+            msg = "Window ordering may be ambiguous, this can cause unstable results."
+            warnings.warn(msg, bfe.AmbiguousWindowWarning)
         return ArrayValue(
             nodes.SliceNode(
                 self.node,
@@ -252,10 +252,10 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]:
                     "Generating offsets not supported in partial ordering mode"
                 )
             else:
-                warnings.warn(
-                    "Window ordering may be ambiguous, this can cause unstable results.",
-                    bigframes.exceptions.AmbiguousWindowWarning,
+                msg = (
+                    "Window ordering may be ambiguous, this can cause unstable results."
                 )
+                warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
 
         return (
             ArrayValue(
@@ -391,10 +391,8 @@ def project_window_op(
                         "Generating offsets not supported in partial ordering mode"
                     )
                 else:
-                    warnings.warn(
-                        "Window ordering may be ambiguous, this can cause unstable results.",
-                        bigframes.exceptions.AmbiguousWindowWarning,
-                    )
+                    msg = "Window ordering may be ambiguous, this can cause unstable results."
+                    warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
 
         output_name = self._gen_namespaced_uid()
         return (
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index ca860612f8..522d1743ff 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -63,7 +63,7 @@
 import bigframes.core.utils as utils
 import bigframes.core.window_spec as windows
 import bigframes.dtypes
-import bigframes.exceptions
+import bigframes.exceptions as bfe
 import bigframes.features
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -137,10 +137,8 @@ def __init__(
                 )
 
         if len(index_columns) == 0:
-            warnings.warn(
-                "Creating object with Null Index. Null Index is a preview feature.",
-                category=bigframes.exceptions.NullIndexPreviewWarning,
-            )
+            msg = "Creating object with Null Index. Null Index is a preview feature."
+            warnings.warn(msg, category=bfe.NullIndexPreviewWarning)
         self._index_columns = tuple(index_columns)
         # Index labels don't need complicated hierarchical access so can store as tuple
         self._index_labels = (
@@ -616,13 +614,13 @@ def _materialize_local(
                     " # Setting it to None will download all the data\n"
                     f"{constants.FEEDBACK_LINK}"
                 )
-
-            warnings.warn(
+            msg = (
                 f"The data size ({table_mb:.2f} MB) exceeds the maximum download limit of"
-                f"({max_download_size} MB). It will be downsampled to {max_download_size} MB for download."
-                "\nPlease refer to the documentation for configuring the downloading limit.",
-                UserWarning,
+                f"({max_download_size} MB). It will be downsampled to {max_download_size} "
+                "MB for download.\nPlease refer to the documentation for configuring "
+                "the downloading limit."
             )
+            warnings.warn(msg, category=UserWarning)
             total_rows = execute_result.total_rows
             # Remove downsampling config from subsequent invocations, as otherwise could result in many
             # iterations if downsampling undershoots
diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py
index 544af69091..a6d3949bc0 100644
--- a/bigframes/core/compile/ibis_types.py
+++ b/bigframes/core/compile/ibis_types.py
@@ -32,6 +32,7 @@
 import pyarrow as pa
 
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 
 # Type hints for Ibis data types supported by BigQuery DataFrame
 IbisDtype = Union[
@@ -305,10 +306,11 @@ def ibis_dtype_to_bigframes_dtype(
 
     # Temporary: Will eventually support an explicit json type instead of casting to string.
     if isinstance(ibis_dtype, ibis_dtypes.JSON):
-        warnings.warn(
-            "Interpreting JSON as string. This behavior may change in future versions.",
-            bigframes.exceptions.PreviewWarning,
+        msg = (
+            "Interpreting JSON column(s) as pyarrow.large_string. This behavior may change "
+            "in future versions."
         )
+        warnings.warn(msg, category=bfe.PreviewWarning)
         return bigframes.dtypes.JSON_DTYPE
 
     if ibis_dtype in IBIS_TO_BIGFRAMES:
diff --git a/bigframes/core/compile/polars/__init__.py b/bigframes/core/compile/polars/__init__.py
index e15f229faf..8c37e046ab 100644
--- a/bigframes/core/compile/polars/__init__.py
+++ b/bigframes/core/compile/polars/__init__.py
@@ -22,4 +22,5 @@
 
     __all__ = ["PolarsCompiler"]
 except Exception:
-    warnings.warn("Polars compiler not available as polars is not installed.")
+    msg = "Polars compiler not available as polars is not installed."
+    warnings.warn(msg)
diff --git a/bigframes/core/global_session.py b/bigframes/core/global_session.py
index e70cdad59e..8b32fee5b4 100644
--- a/bigframes/core/global_session.py
+++ b/bigframes/core/global_session.py
@@ -22,6 +22,7 @@
 import google.auth.exceptions
 
 import bigframes._config
+import bigframes.exceptions as bfe
 import bigframes.session
 
 _global_session: Optional[bigframes.session.Session] = None
@@ -38,11 +39,11 @@ def _try_close_session(session: bigframes.session.Session):
         session_id = session.session_id
         location = session._location
         project_id = session._project
-        warnings.warn(
+        msg = (
             f"Session cleanup failed for session with id: {session_id}, "
-            f"location: {location}, project: {project_id}",
-            category=bigframes.exceptions.CleanupFailedWarning,
+            f"location: {location}, project: {project_id}"
         )
+        warnings.warn(msg, category=bfe.CleanupFailedWarning)
         traceback.print_tb(e.__traceback__)
 
 
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index 47ece70fb8..9c7fba8ec1 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -29,7 +29,7 @@
 import bigframes.core.scalar
 import bigframes.dataframe
 import bigframes.dtypes
-import bigframes.exceptions
+import bigframes.exceptions as bfe
 import bigframes.operations as ops
 import bigframes.series
 
@@ -407,11 +407,12 @@ def _struct_accessor_check_and_warn(
         return
 
     if not bigframes.dtypes.is_string_like(series.index.dtype):
-        warnings.warn(
-            "Are you trying to access struct fields? If so, please use Series.struct.field(...) method instead.",
-            category=bigframes.exceptions.BadIndexerKeyWarning,
-            stacklevel=7,  # Stack depth from series.__getitem__ to here
+        msg = (
+            "Are you trying to access struct fields? If so, please use Series.struct.field(...) "
+            "method instead."
         )
+        # Stack depth from series.__getitem__ to here
+        warnings.warn(msg, stacklevel=7, category=bfe.BadIndexerKeyWarning)
 
 
 @typing.overload
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index 3bafa380bf..f9ca6cb5f0 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -21,7 +21,7 @@
 import pandas as pd
 import typing_extensions
 
-import bigframes.exceptions as exc
+import bigframes.exceptions as bfe
 
 UNNAMED_COLUMN_ID = "bigframes_unnamed_column"
 UNNAMED_INDEX_ID = "bigframes_unnamed_index"
@@ -170,11 +170,6 @@ def merge_column_labels(
     return pd.Index(result_labels)
 
 
-def warn_preview(msg=""):
-    """Warn a preview API."""
-    warnings.warn(msg, exc.PreviewWarning)
-
-
 def preview(*, name: str):
     """Decorate to warn of a preview API."""
 
@@ -183,7 +178,7 @@ def decorator(func):
 
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
-            warn_preview(msg=msg)
+            warnings.warn(msg, category=bfe.PreviewWarning)
             return func(*args, **kwargs)
 
         return wrapper
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index e7a0444af1..01e9bd6308 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -68,7 +68,7 @@
 import bigframes.core.window
 import bigframes.core.window_spec as windows
 import bigframes.dtypes
-import bigframes.exceptions
+import bigframes.exceptions as bfe
 import bigframes.formatting_helpers as formatter
 import bigframes.operations as ops
 import bigframes.operations.aggregations
@@ -1481,10 +1481,8 @@ def to_arrow(
         Returns:
             pyarrow.Table: A pyarrow Table with all rows and columns of this DataFrame.
         """
-        warnings.warn(
-            "to_arrow is in preview. Types and unnamed / duplicate name columns may change in future.",
-            category=bigframes.exceptions.PreviewWarning,
-        )
+        msg = "to_arrow is in preview. Types and unnamed / duplicate name columns may change in future."
+        warnings.warn(msg, category=bfe.PreviewWarning)
 
         pa_table, query_job = self._block.to_arrow(ordered=ordered)
         self._set_internal_query_job(query_job)
@@ -3920,10 +3918,8 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
 
     def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
         if utils.get_axis_number(axis) == 1:
-            warnings.warn(
-                "axis=1 scenario is in preview.",
-                category=bigframes.exceptions.PreviewWarning,
-            )
+            msg = "axis=1 scenario is in preview."
+            warnings.warn(msg, category=bfe.PreviewWarning)
 
             # Check if the function is a remote function
             if not hasattr(func, "bigframes_remote_function"):
diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py
index 84bf2e1fc9..662c32a6a6 100644
--- a/bigframes/functions/_remote_function_session.py
+++ b/bigframes/functions/_remote_function_session.py
@@ -300,7 +300,7 @@ def remote_function(
                 https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
         """
         # Some defaults may be used from the session if not provided otherwise
-        import bigframes.exceptions as bf_exceptions
+        import bigframes.exceptions as bfe
         import bigframes.pandas as bpd
         import bigframes.series as bf_series
         import bigframes.session
@@ -445,11 +445,8 @@ def wrapper(func):
                 (input_type := input_types[0]) == bf_series.Series
                 or input_type == pandas.Series
             ):
-                warnings.warn(
-                    "input_types=Series is in preview.",
-                    stacklevel=1,
-                    category=bf_exceptions.PreviewWarning,
-                )
+                msg = "input_types=Series is in preview."
+                warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
                 # we will model the row as a json serialized string containing the data
                 # and the metadata representing the row
diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py
index b72b8ce8da..9b68843a7d 100644
--- a/bigframes/functions/remote_function.py
+++ b/bigframes/functions/remote_function.py
@@ -32,6 +32,7 @@
 
 import bigframes.core.compile.ibis_types
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.functions.remote_function_template
 
 from . import _remote_function_session as rf_session
@@ -197,11 +198,11 @@ def func(*bigframes_args, **bigframes_kwargs):
             )
         function_input_dtypes.append(input_dtype)
     if has_unknown_dtypes:
-        warnings.warn(
-            "The function has one or more missing input data types."
-            f" BigQuery DataFrames will assume default data type {bigframes.dtypes.DEFAULT_DTYPE} for them.",
-            category=bigframes.exceptions.UnknownDataTypeWarning,
+        msg = (
+            "The function has one or more missing input data types. BigQuery DataFrames "
+            f"will assume default data type {bigframes.dtypes.DEFAULT_DTYPE} for them."
         )
+        warnings.warn(msg, category=bfe.UnknownDataTypeWarning)
     func.input_dtypes = tuple(function_input_dtypes)  # type: ignore
 
     func.output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(  # type: ignore
diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index a2c122f8c7..f06de99181 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -241,9 +241,8 @@ def _predict_and_retry(
 
             if df_succ.empty:
                 if max_retries > 0:
-                    warnings.warn(
-                        "Can't make any progress, stop retrying.", RuntimeWarning
-                    )
+                    msg = "Can't make any progress, stop retrying."
+                    warnings.warn(msg, category=RuntimeWarning)
                 break
 
             df_result = (
@@ -254,10 +253,11 @@ def _predict_and_retry(
                 break
 
         if not df_fail.empty:
-            warnings.warn(
-                f"Some predictions failed. Check column {self._status_col} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
+            msg = (
+                f"Some predictions failed. Check column {self._status_col} for detailed "
+                "status. You may want to filter the failed rows and retry."
             )
+            warnings.warn(msg, category=RuntimeWarning)
 
         df_result = cast(
             bpd.DataFrame,
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index e6825f80bb..8d1df6e0b9 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -180,12 +180,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _TEXT_GENERATOR_ENDPOINTS:
-            warnings.warn(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
-                )
+            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
+                model_name=self.model_name,
+                known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
             )
+            warnings.warn(msg)
 
         options = {
             "endpoint": self.model_name,
@@ -360,10 +359,11 @@ def predict(
         df = self._bqml_model.generate_text(X, options)
 
         if (df[_ML_GENERATE_TEXT_STATUS] != "").any():
-            warnings.warn(
-                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
+            msg = (
+                f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for "
+                "detailed status. You may want to filter the failed rows and retry."
             )
+            warnings.warn(msg, category=RuntimeWarning)
 
         return df
 
@@ -513,12 +513,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS:
-            warnings.warn(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
-                )
+            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
+                model_name=self.model_name,
+                known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
             )
+            warnings.warn(msg)
 
         endpoint = (
             self.model_name + "@" + self.version if self.version else self.model_name
@@ -590,10 +589,11 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame:
         )
 
         if (df[_ML_EMBED_TEXT_STATUS] != "").any():
-            warnings.warn(
-                f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
+            msg = (
+                f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for "
+                "detailed status. You may want to filter the failed rows and retry."
             )
+            warnings.warn(msg, category=RuntimeWarning)
 
         return df
 
@@ -678,12 +678,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _TEXT_EMBEDDING_ENDPOINTS:
-            warnings.warn(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS),
-                )
+            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
+                model_name=self.model_name,
+                known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS),
             )
+            warnings.warn(msg)
 
         options = {
             "endpoint": self.model_name,
@@ -813,13 +812,15 @@ def __init__(
         max_iterations: int = 300,
     ):
         if model_name in _GEMINI_PREVIEW_ENDPOINTS:
-            warnings.warn(
-                f"""Model {model_name} is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the
-            Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is"
-            and might have limited support. For more information, see the launch stage descriptions
-            (https://cloud.google.com/products#product-launch-stages).""",
-                category=exceptions.PreviewWarning,
+            msg = (
+                f'Model {model_name} is subject to the "Pre-GA Offerings Terms" in '
+                "the General Service Terms section of the Service Specific Terms"
+                "(https://cloud.google.com/terms/service-terms#1). Pre-GA products and "
+                'features are available "as is" and might have limited support. For '
+                "more information, see the launch stage descriptions "
+                "(https://cloud.google.com/products#product-launch-stages)."
             )
+            warnings.warn(msg, category=exceptions.PreviewWarning)
         self.model_name = model_name
         self.session = session or bpd.get_global_session()
         self.max_iterations = max_iterations
@@ -856,12 +857,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _GEMINI_ENDPOINTS:
-            warnings.warn(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_GEMINI_ENDPOINTS),
-                )
+            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
+                model_name=self.model_name,
+                known_models=", ".join(_GEMINI_ENDPOINTS),
             )
+            warnings.warn(msg)
 
         options = {"endpoint": self.model_name}
 
@@ -1204,13 +1204,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _CLAUDE_3_ENDPOINTS:
-            warnings.warn(
-                _MODEL_NOT_SUPPORTED_WARNING.format(
-                    model_name=self.model_name,
-                    known_models=", ".join(_CLAUDE_3_ENDPOINTS),
-                )
+            msg = _MODEL_NOT_SUPPORTED_WARNING.format(
+                model_name=self.model_name,
+                known_models=", ".join(_CLAUDE_3_ENDPOINTS),
             )
-
+            warnings.warn(msg)
         options = {
             "endpoint": self.model_name,
         }
diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py
index bb614b9da5..f4f55ad34e 100644
--- a/bigframes/ml/remote.py
+++ b/bigframes/ml/remote.py
@@ -139,9 +139,10 @@ def predict(
 
         # unlike LLM models, the general remote model status is null for successful runs.
         if (df[_REMOTE_MODEL_STATUS].notna()).any():
-            warnings.warn(
-                f"Some predictions failed. Check column {_REMOTE_MODEL_STATUS} for detailed status. You may want to filter the failed rows and retry.",
-                RuntimeWarning,
+            msg = (
+                f"Some predictions failed. Check column {_REMOTE_MODEL_STATUS} for "
+                "detailed status. You may want to filter the failed rows and retry."
             )
+            warnings.warn(msg, category=RuntimeWarning)
 
         return df
diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py
index b7c926be99..9c68a2c5ca 100644
--- a/bigframes/operations/_matplotlib/core.py
+++ b/bigframes/operations/_matplotlib/core.py
@@ -70,11 +70,10 @@ def _compute_sample_data(self, data):
         if self._sampling_warning_msg is not None:
             total_n = data.shape[0]
             if sampling_n < total_n:
-                warnings.warn(
-                    self._sampling_warning_msg.format(
-                        sampling_n=sampling_n, total_n=total_n
-                    )
+                msg = self._sampling_warning_msg.format(
+                    sampling_n=sampling_n, total_n=total_n
                 )
+                warnings.warn(msg)
 
         sampling_random_state = self.kwargs.pop(
             "sampling_random_state", DEFAULT_SAMPLING_STATE
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index 6a537db4f3..a2bf18a41d 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -140,10 +140,11 @@ def agg(
         column = columns[0]
 
         if ground_with_google_search:
-            warnings.warn(
+            msg = (
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
+            warnings.warn(msg)
 
         user_instruction = self._format_instruction(instruction, columns)
 
@@ -370,10 +371,11 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
                 raise ValueError(f"Column {column} not found.")
 
         if ground_with_google_search:
-            warnings.warn(
+            msg = (
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
+            warnings.warn(msg)
 
         self._confirm_operation(len(self._df))
 
@@ -468,10 +470,11 @@ def map(
                 raise ValueError(f"Column {column} not found.")
 
         if ground_with_google_search:
-            warnings.warn(
+            msg = (
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
+            warnings.warn(msg)
 
         self._confirm_operation(len(self._df))
 
@@ -569,10 +572,11 @@ def join(
         columns = self._parse_columns(instruction)
 
         if ground_with_google_search:
-            warnings.warn(
+            msg = (
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
+            warnings.warn(msg)
 
         work_estimate = len(self._df) * len(other)
         self._confirm_operation(work_estimate)
@@ -811,10 +815,11 @@ def top_k(
             )
 
         if ground_with_google_search:
-            warnings.warn(
+            msg = (
                 "Enables Grounding with Google Search may impact billing cost. See pricing "
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
+            warnings.warn(msg)
 
         work_estimate = int(len(self._df) * (len(self._df) - 1) / 2)
         self._confirm_operation(work_estimate)
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 51ca6d12b2..d787f8e7f3 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -66,6 +66,7 @@
 import bigframes.dataframe
 import bigframes.dtypes
 import bigframes.exceptions
+import bigframes.exceptions as bfe
 import bigframes.functions._remote_function_session as bigframes_rf_session
 import bigframes.functions.remote_function as bigframes_rf
 import bigframes.session._io.bigquery as bf_io_bigquery
@@ -150,25 +151,22 @@ def __init__(
 
         if context.location is None:
             self._location = "US"
-            warnings.warn(
-                f"No explicit location is set, so using location {self._location} for the session.",
-                # User's code
-                # -> get_global_session()
-                # -> connect()
-                # -> Session()
-                #
-                # Note: We could also have:
-                # User's code
-                # -> read_gbq()
-                # -> with_default_session()
-                # -> get_global_session()
-                # -> connect()
-                # -> Session()
-                # but we currently have no way to disambiguate these
-                # situations.
-                stacklevel=4,
-                category=bigframes.exceptions.DefaultLocationWarning,
-            )
+            msg = f"No explicit location is set, so using location {self._location} for the session."
+            # User's code
+            # -> get_global_session()
+            # -> connect()
+            # -> Session()
+            #
+            # Note: We could also have:
+            # User's code
+            # -> read_gbq()
+            # -> with_default_session()
+            # -> get_global_session()
+            # -> connect()
+            # -> Session()
+            # but we currently have no way to disambiguate these
+            # situations.
+            warnings.warn(msg, stacklevel=4, category=bfe.DefaultLocationWarning)
         else:
             self._location = context.location
 
@@ -236,10 +234,8 @@ def __init__(
         # Will expose as feature later, only False for internal testing
         self._strictly_ordered: bool = context.ordering_mode != "partial"
         if not self._strictly_ordered:
-            warnings.warn(
-                "Partial ordering mode is a preview feature and is subject to change.",
-                bigframes.exceptions.OrderingModePartialPreviewWarning,
-            )
+            msg = "Partial ordering mode is a preview feature and is subject to change."
+            warnings.warn(msg, bfe.OrderingModePartialPreviewWarning)
 
         self._allow_ambiguity = not self._strictly_ordered
         self._default_index_type = (
@@ -604,11 +600,8 @@ def read_gbq_table_streaming(
             bigframes.streaming.dataframe.StreamingDataFrame:
                A StreamingDataFrame representing results of the table.
         """
-        warnings.warn(
-            "The bigframes.streaming module is a preview feature, and subject to change.",
-            stacklevel=1,
-            category=bigframes.exceptions.PreviewWarning,
-        )
+        msg = "The bigframes.streaming module is a preview feature, and subject to change."
+        warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
         import bigframes.streaming.dataframe as streaming_dataframe
 
diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py
index 4044b7bf43..6114427570 100644
--- a/bigframes/session/_io/bigquery/read_gbq_table.py
+++ b/bigframes/session/_io/bigquery/read_gbq_table.py
@@ -33,6 +33,7 @@
 import bigframes.core.compile.default_ordering
 import bigframes.core.sql
 import bigframes.dtypes
+import bigframes.exceptions as bfe
 import bigframes.session._io.bigquery
 import bigframes.session.clients
 import bigframes.version
@@ -59,21 +60,21 @@ def get_table_metadata(
         # Cache hit could be unexpected. See internal issue 329545805.
         # Raise a warning with more information about how to avoid the
         # problems with the cache.
-        warnings.warn(
+        msg = (
             f"Reading cached table from {snapshot_timestamp} to avoid "
             "incompatibilies with previous reads of this table. To read "
             "the latest version, set `use_cache=False` or close the "
             "current session with Session.close() or "
-            "bigframes.pandas.close_session().",
-            # There are many layers before we get to (possibly) the user's code:
-            # pandas.read_gbq_table
-            # -> with_default_session
-            # -> Session.read_gbq_table
-            # -> _read_gbq_table
-            # -> _get_snapshot_sql_and_primary_key
-            # -> get_snapshot_datetime_and_table_metadata
-            stacklevel=7,
+            "bigframes.pandas.close_session()."
         )
+        # There are many layers before we get to (possibly) the user's code:
+        # pandas.read_gbq_table
+        # -> with_default_session
+        # -> Session.read_gbq_table
+        # -> _read_gbq_table
+        # -> _get_snapshot_sql_and_primary_key
+        # -> get_snapshot_datetime_and_table_metadata
+        warnings.warn(msg, stacklevel=7)
         return cached_table
 
     table = bqclient.get_table(table_ref)
@@ -104,13 +105,13 @@ def validate_table(
     # Only true tables support time travel
     elif table.table_type != "TABLE":
         if table.table_type == "MATERIALIZED_VIEW":
-            warnings.warn(
+            msg = (
                 "Materialized views do not support FOR SYSTEM_TIME AS OF queries. "
                 "Attempting query without time travel. Be aware that as materialized views "
                 "are updated periodically, modifications to the underlying data in the view may "
-                "result in errors or unexpected behavior.",
-                category=bigframes.exceptions.TimeTravelDisabledWarning,
+                "result in errors or unexpected behavior."
             )
+            warnings.warn(msg, category=bfe.TimeTravelDisabledWarning)
     else:
         # table might support time travel, lets do a dry-run query with time travel
         snapshot_sql = bigframes.session._io.bigquery.to_query(
@@ -142,13 +143,13 @@ def validate_table(
         snapshot_sql, job_config=bigquery.QueryJobConfig(dry_run=True)
     )
     if time_travel_not_found:
-        warnings.warn(
+        msg = (
             "NotFound error when reading table with time travel."
             " Attempting query without time travel. Warning: Without"
             " time travel, modifications to the underlying table may"
-            " result in errors or unexpected behavior.",
-            category=bigframes.exceptions.TimeTravelDisabledWarning,
+            " result in errors or unexpected behavior."
         )
+        warnings.warn(msg, category=bfe.TimeTravelDisabledWarning)
     return False
 
 
@@ -263,15 +264,15 @@ def get_index_cols(
         # resource utilization because of the default sequential index. See
         # internal issue 335727141.
         if _is_table_clustered_or_partitioned(table) and not primary_keys:
-            warnings.warn(
+            msg = (
                 f"Table '{str(table.reference)}' is clustered and/or "
                 "partitioned, but BigQuery DataFrames was not able to find a "
                 "suitable index. To avoid this warning, set at least one of: "
                 # TODO(b/338037499): Allow max_results to override this too,
                 # once we make it more efficient.
-                "`index_col` or `filters`.",
-                category=bigframes.exceptions.DefaultIndexWarning,
+                "`index_col` or `filters`."
             )
+            warnings.warn(msg, category=bfe.DefaultIndexWarning)
 
         # If there are primary keys defined, the query engine assumes these
         # columns are unique, even if the constraint is not enforced. We make
@@ -296,21 +297,21 @@ def get_time_travel_datetime_and_table_metadata(
         # Cache hit could be unexpected. See internal issue 329545805.
         # Raise a warning with more information about how to avoid the
         # problems with the cache.
-        warnings.warn(
+        msg = (
             f"Reading cached table from {snapshot_timestamp} to avoid "
             "incompatibilies with previous reads of this table. To read "
             "the latest version, set `use_cache=False` or close the "
             "current session with Session.close() or "
-            "bigframes.pandas.close_session().",
-            # There are many layers before we get to (possibly) the user's code:
-            # pandas.read_gbq_table
-            # -> with_default_session
-            # -> Session.read_gbq_table
-            # -> _read_gbq_table
-            # -> _get_snapshot_sql_and_primary_key
-            # -> get_snapshot_datetime_and_table_metadata
-            stacklevel=7,
+            "bigframes.pandas.close_session()."
         )
+        # There are many layers before we get to (possibly) the user's code:
+        # pandas.read_gbq_table
+        # -> with_default_session
+        # -> Session.read_gbq_table
+        # -> _read_gbq_table
+        # -> _get_snapshot_sql_and_primary_key
+        # -> get_snapshot_datetime_and_table_metadata
+        warnings.warn(msg, stacklevel=7)
         return cached_table
 
     # TODO(swast): It's possible that the table metadata is changed between now
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 9ca1fa3117..553c3fd6e6 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -374,7 +374,8 @@ def peek(
         """
         plan = self.replace_cached_subtrees(array_value.node)
         if not tree_properties.can_fast_peek(plan):
-            warnings.warn("Peeking this value cannot be done efficiently.")
+            msg = "Peeking this value cannot be done efficiently."
+            warnings.warn(msg)
 
         sql = self.compiler.compile_peek(plan, n_rows)
 
diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py
index b83ae5d822..960da9f57c 100644
--- a/bigframes/streaming/dataframe.py
+++ b/bigframes/streaming/dataframe.py
@@ -26,6 +26,7 @@
 import bigframes
 from bigframes import dataframe
 from bigframes.core import log_adapter
+import bigframes.exceptions as bfe
 
 
 def _return_type_wrapper(method, cls):
@@ -347,11 +348,8 @@ def _to_bigtable(
             For example, the job can be cancelled or its error status
             can be examined.
     """
-    warnings.warn(
-        "The bigframes.streaming module is a preview feature, and subject to change.",
-        stacklevel=1,
-        category=bigframes.exceptions.PreviewWarning,
-    )
+    msg = "The bigframes.streaming module is a preview feature, and subject to change."
+    warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
     # get default client if not passed
     if session is None:
@@ -462,11 +460,8 @@ def _to_pubsub(
             For example, the job can be cancelled or its error status
             can be examined.
     """
-    warnings.warn(
-        "The bigframes.streaming module is a preview feature, and subject to change.",
-        stacklevel=1,
-        category=bigframes.exceptions.PreviewWarning,
-    )
+    msg = "The bigframes.streaming module is a preview feature, and subject to change."
+    warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
 
     # get default client if not passed
     if session is None:
diff --git a/notebooks/generative_ai/large_language_models.ipynb b/notebooks/generative_ai/large_language_models.ipynb
index 744706cab8..bcb8f0f1a0 100644
--- a/notebooks/generative_ai/large_language_models.ipynb
+++ b/notebooks/generative_ai/large_language_models.ipynb
@@ -28,14 +28,26 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/ml/llm.py:589: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
-      "  self.session = session or bpd.get_global_session()\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/pandas/__init__.py:259: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+      "  return global_session.get_global_session()\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "Query job 675a6c8a-213b-496c-9f77-b87bf7cfa5e0 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:675a6c8a-213b-496c-9f77-b87bf7cfa5e0&page=queryresults\">Open Job</a>"
+       "Query job 92699550-36bc-4b51-9aec-fa79bc3a4927 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:92699550-36bc-4b51-9aec-fa79bc3a4927&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 973b8369-8ba3-430b-b148-c577ed180024 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:973b8369-8ba3-430b-b148-c577ed180024&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -88,7 +100,7 @@
     {
      "data": {
       "text/html": [
-       "Query job 7967df2b-9f0f-45c8-a363-15f65891c3bf is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7967df2b-9f0f-45c8-a363-15f65891c3bf&page=queryresults\">Open Job</a>"
+       "Query job 1b9963ae-d091-467c-8c16-2d4ab8f5b94c is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1b9963ae-d091-467c-8c16-2d4ab8f5b94c&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -101,14 +113,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/__init__.py:108: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/core/__init__.py:114: PreviewWarning: Interpreting JSON column(s) as pyarrow.large_string. This behavior may change in future versions.\n",
+      "  warnings.warn(msg, bfe.PreviewWarning)\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "Query job 9a1f57cd-98e1-4eac-a1b3-8f88d61971cd is DONE. 6 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9a1f57cd-98e1-4eac-a1b3-8f88d61971cd&page=queryresults\">Open Job</a>"
+       "Query job 27de013f-af76-4730-b14d-dc3f2a7a9c3f is DONE. 6 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:27de013f-af76-4730-b14d-dc3f2a7a9c3f&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -120,7 +132,19 @@
     {
      "data": {
       "text/html": [
-       "Query job 2a94a2cf-7d4c-4009-a798-d7a5d6d4049d is DONE. 8.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:2a94a2cf-7d4c-4009-a798-d7a5d6d4049d&page=queryresults\">Open Job</a>"
+       "Query job 0cf5695d-fdc5-4b73-b477-f69afd2e2fe1 is DONE. 6 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0cf5695d-fdc5-4b73-b477-f69afd2e2fe1&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job fd37ef18-1d38-4bfa-b50d-c83a4a861a9b is DONE. 9.6 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:fd37ef18-1d38-4bfa-b50d-c83a4a861a9b&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -159,28 +183,24 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>## BigQuery: A Serverless Data Warehouse\n",
-       "\n",
-       "BigQ...</td>\n",
-       "      <td>[{\"category\":1,\"probability\":1,\"probability_sc...</td>\n",
+       "      <td>## BigQuery: A serverless data warehouse for l...</td>\n",
+       "      <td>[{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...</td>\n",
        "      <td></td>\n",
        "      <td>What is BigQuery?</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>## BigQuery Machine Learning (BQML)\n",
+       "      <td>## BQML: BigQuery Machine Learning\n",
        "\n",
-       "BQML is a...</td>\n",
-       "      <td>[{\"category\":1,\"probability\":1,\"probability_sc...</td>\n",
+       "BQML (BigQ...</td>\n",
+       "      <td>[{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...</td>\n",
        "      <td></td>\n",
        "      <td>What is BQML?</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>## What is BigQuery DataFrame?\n",
-       "\n",
-       "**BigQuery Dat...</td>\n",
-       "      <td>[{\"category\":1,\"probability\":1,\"probability_sc...</td>\n",
+       "      <td>I'll do my best to provide a comprehensive and...</td>\n",
+       "      <td>[{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...</td>\n",
        "      <td></td>\n",
        "      <td>What is BigQuery DataFrame?</td>\n",
        "    </tr>\n",
@@ -190,20 +210,16 @@
       ],
       "text/plain": [
        "                         ml_generate_text_llm_result  \\\n",
-       "0  ## BigQuery: A Serverless Data Warehouse\n",
+       "0  ## BigQuery: A serverless data warehouse for l...   \n",
+       "1  ## BQML: BigQuery Machine Learning\n",
        "\n",
-       "BigQ...   \n",
-       "1  ## BigQuery Machine Learning (BQML)\n",
-       "\n",
-       "BQML is a...   \n",
-       "2  ## What is BigQuery DataFrame?\n",
-       "\n",
-       "**BigQuery Dat...   \n",
+       "BQML (BigQ...   \n",
+       "2  I'll do my best to provide a comprehensive and...   \n",
        "\n",
        "                         ml_generate_text_rai_result ml_generate_text_status  \\\n",
-       "0  [{\"category\":1,\"probability\":1,\"probability_sc...                           \n",
-       "1  [{\"category\":1,\"probability\":1,\"probability_sc...                           \n",
-       "2  [{\"category\":1,\"probability\":1,\"probability_sc...                           \n",
+       "0  [{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...                           \n",
+       "1  [{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...                           \n",
+       "2  [{\"category\":\"HARM_CATEGORY_HATE_SPEECH\",\"prob...                           \n",
        "\n",
        "                        prompt  \n",
        "0            What is BigQuery?  \n",
@@ -237,7 +253,7 @@
     {
      "data": {
       "text/plain": [
-       "\"## BigQuery: A Serverless Data Warehouse\\n\\nBigQuery is a serverless, cloud-based data warehouse that enables scalable analysis of large datasets. It's a popular choice for businesses of all sizes due to its ability to handle petabytes of data and run complex queries quickly and efficiently. Let's delve into its key features:\\n\\n**Serverless Architecture:** BigQuery eliminates the need for server management, allowing you to focus on analyzing data. Google manages the infrastructure, scaling resources up or down automatically based on your needs.\\n\\n**Scalability:** BigQuery can handle massive datasets, scaling seamlessly as your data volume grows. It automatically distributes queries across its infrastructure, ensuring fast and efficient processing.\\n\\n**SQL-like Querying:** BigQuery uses a familiar SQL-like syntax, making it easy for data analysts and developers to learn and use. This allows them to leverage their existing SQL knowledge for data exploration and analysis.\\n\\n**Cost-Effectiveness:** BigQuery offers a pay-as-you-go pricing model, meaning you only pay for the resources you use. This makes it a cost-effective solution for businesses with varying data processing needs.\\n\\n**Integration with Google Cloud:** BigQuery integrates seamlessly with other Google Cloud services like Cloud Storage, Dataflow, and Machine Learning, enabling a comprehensive data processing and analysis workflow within the Google Cloud ecosystem.\\n\\n**Security and Reliability:** BigQuery offers robust security features and high availability, ensuring data protection and reliable access.\\n\\n**Use Cases:** BigQuery finds applications in various scenarios, including:\\n\\n* **Data Warehousing:** Store and analyze large amounts of structured and semi-structured data.\\n* **Business Intelligence:** Generate insights from data for informed decision-making.\\n* **Data Analytics:** Perform complex data analysis and extract valuable patterns.\\n* **Machine Learning:** Train and deploy machine learning models on large datasets.\\n\\n**Getting Started:** To get started with BigQuery, you can create a free trial account on Google Cloud Platform and explore its features. Numerous tutorials and documentation are available to help you learn and use BigQuery effectively.\\n\\n## Additional Resources:\\n\\n* **BigQuery Documentation:** https://cloud.google.com/bigquery/docs/\\n* **BigQuery Quickstart:** https://cloud.google.com/bigquery/docs/quickstarts/quickstart-console\\n* **BigQuery Pricing:** https://cloud.google.com/bigquery/pricing\\n\\nFeel free to ask if you have any further questions about BigQuery!\""
+       "\"## BigQuery: A serverless data warehouse for large-scale data analysis\\n\\nBigQuery is a serverless, highly-scalable data warehouse designed for analyzing large datasets. It's a cloud-based service offered by Google Cloud Platform (GCP), allowing users to store, manage, and analyze massive amounts of data without managing infrastructure. \\n\\nHere are some key features of BigQuery:\\n\\n**Serverless:** You don't need to worry about provisioning, managing, or scaling servers. BigQuery handles all of this automatically, letting you focus on analyzing your data.\\n\\n**Highly-scalable:** BigQuery can handle datasets of any size, from gigabytes to petabytes. It can also scale up and down automatically to meet your processing needs.\\n\\n**Cost-effective:** You only pay for the resources you use, and there are no upfront costs. Additionally, BigQuery offers several pricing models to fit your needs, including on-demand, flat-rate, and flexible slots.\\n\\n**Easy to use:** BigQuery uses SQL, a standard query language, making it easy to analyze your data. No need to learn a new programming language.\\n\\n**Integrated with GCP:** BigQuery integrates seamlessly with other GCP services, such as Google Cloud Storage, Dataflow, and Kubernetes. This allows you to build powerful data pipelines and workflows.\\n\\n**Secure:** BigQuery uses industry-standard security practices to protect your data.\\n\\nHere are some use cases for BigQuery:\\n\\n* **Data warehousing and analytics:** Store and analyze large datasets for business intelligence and reporting.\\n* **Machine learning:** Train and deploy machine learning models on your data.\\n* **Data integration:** Combine data from multiple sources for analysis.\\n* **Real-time analytics:** Analyze data in real-time for insights and decision-making.\\n\\n**Here are some additional resources that you may find helpful:**\\n\\n* **BigQuery website:** https://cloud.google.com/bigquery\\n* **BigQuery documentation:** https://cloud.google.com/bigquery/docs\\n* **BigQuery tutorial:** https://cloud.google.com/bigquery/docs/tutorials\\n* **BigQuery pricing:** https://cloud.google.com/bigquery/pricing\\n\\nI hope this gives you a good overview of BigQuery. Please let me know if you have any other questions.\""
       ]
      },
      "execution_count": 5,
@@ -248,18 +264,11 @@
    "source": [
     "pred.iloc[0, 0]"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -273,7 +282,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.1"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/streaming/streaming_dataframe.ipynb b/notebooks/streaming/streaming_dataframe.ipynb
index 9b52c2d71e..b7da0cfd07 100644
--- a/notebooks/streaming/streaming_dataframe.ipynb
+++ b/notebooks/streaming/streaming_dataframe.ipynb
@@ -21,7 +21,7 @@
     {
      "data": {
       "text/plain": [
-       "'1.13.0'"
+       "'1.31.0'"
       ]
      },
      "execution_count": 1,
@@ -55,7 +55,19 @@
     {
      "data": {
       "text/html": [
-       "Query job 65df3a2f-cda8-405d-8b38-20a755f9b9a0 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:65df3a2f-cda8-405d-8b38-20a755f9b9a0&page=queryresults\">Open Job</a>"
+       "Query job c72abbec-0dda-49e8-8617-4d8178659ec2 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:c72abbec-0dda-49e8-8617-4d8178659ec2&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job d55762e7-d9d4-4a79-84a4-4975e9292158 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:d55762e7-d9d4-4a79-84a4-4975e9292158&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -98,10 +110,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/session/__init__.py:773: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/session/__init__.py:604: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "  warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)\n",
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/core/blocks.py:141: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
+      "  warnings.warn(msg, category=bfe.NullIndexPreviewWarning)\n"
      ]
     }
    ],
@@ -118,8 +130,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/core/blocks.py:141: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
+      "  warnings.warn(msg, category=bfe.NullIndexPreviewWarning)\n"
      ]
     },
     {
@@ -132,7 +144,7 @@
     {
      "data": {
       "text/html": [
-       "Query job dd20bd9d-4844-43e4-86ab-95759d7e673a is DONE. 2.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:dd20bd9d-4844-43e4-86ab-95759d7e673a&page=queryresults\">Open Job</a>"
+       "Query job 2894a764-5336-492f-98e1-c865fb161ef9 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:2894a764-5336-492f-98e1-c865fb161ef9&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -141,20 +153,10 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
-      "  warnings.warn(\n",
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
-       "Query job 873e44ee-76e9-4254-83d3-04cf36fbd140 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:873e44ee-76e9-4254-83d3-04cf36fbd140&page=queryresults\">Open Job</a>"
+       "Query job f8fb08cb-ba11-4d73-8fff-c36081d98206 is DONE. 10.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-load-testing&j=bq:US:f8fb08cb-ba11-4d73-8fff-c36081d98206&page=queryresults\">Open Job</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -408,8 +410,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:341: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/streaming/dataframe.py:352: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "  warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)\n"
      ]
     }
    ],
@@ -482,8 +484,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/core/blocks.py:126: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/core/blocks.py:141: NullIndexPreviewWarning: Creating object with Null Index. Null Index is a preview feature.\n",
+      "  warnings.warn(msg, category=bfe.NullIndexPreviewWarning)\n"
      ]
     }
    ],
@@ -501,8 +503,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/garrettwu/src/bigframes/bigframes/streaming/dataframe.py:456: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
-      "  warnings.warn(\n"
+      "/usr/local/google/home/chelsealin/src/bigframes1/bigframes/streaming/dataframe.py:464: PreviewWarning: The bigframes.streaming module is a preview feature, and subject to change.\n",
+      "  warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)\n"
      ]
     }
    ],
@@ -553,13 +555,6 @@
    "source": [
     "job.cancel()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -578,7 +573,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.1"
   }
  },
  "nbformat": 4,
diff --git a/noxfile.py b/noxfile.py
index 9610c1287e..967ced87ab 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -170,11 +170,11 @@ def install_unittest_dependencies(session, install_test_extra, *constraints):
     session.install(*standard_deps, *constraints)
 
     if UNIT_TEST_EXTERNAL_DEPENDENCIES:
-        warnings.warn(
+        msg = (
             "'unit_test_external_dependencies' is deprecated. Instead, please "
             "use 'unit_test_dependencies' or 'unit_test_local_dependencies'.",
-            DeprecationWarning,
         )
+        warnings.warn(msg, DeprecationWarning)
         session.install(*UNIT_TEST_EXTERNAL_DEPENDENCIES, *constraints)
 
     if UNIT_TEST_LOCAL_DEPENDENCIES:
diff --git a/tests/unit/_config/test_experiment_options.py b/tests/unit/_config/test_experiment_options.py
index e48479885d..8e612be06c 100644
--- a/tests/unit/_config/test_experiment_options.py
+++ b/tests/unit/_config/test_experiment_options.py
@@ -15,6 +15,7 @@
 import pytest
 
 import bigframes._config.experiment_options as experiment_options
+import bigframes.exceptions as bfe
 
 
 def test_semantic_operators_default_false():
@@ -26,7 +27,7 @@ def test_semantic_operators_default_false():
 def test_semantic_operators_set_true_shows_warning():
     options = experiment_options.ExperimentOptions()
 
-    with pytest.warns(UserWarning):
+    with pytest.warns(bfe.PreviewWarning):
         options.semantic_operators = True
 
     assert options.semantic_operators is True
@@ -41,7 +42,7 @@ def test_blob_default_false():
 def test_blob_set_true_shows_warning():
     options = experiment_options.ExperimentOptions()
 
-    with pytest.warns(UserWarning):
+    with pytest.warns(bfe.PreviewWarning):
         options.blob = True
 
     assert options.blob is True

From dc92e95b733e53d76b4eef67be720fb0aa59ae27 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Tue, 14 Jan 2025 11:59:48 -0800
Subject: [PATCH 22/22] chore(main): release 1.32.0 (#1263)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md                              | 29 +++++++++++++++++++++++
 bigframes/version.py                      |  2 +-
 third_party/bigframes_vendored/version.py |  2 +-
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7826047761..b4bec86e9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,35 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.32.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.31.0...v1.32.0) (2025-01-13)
+
+
+### Features
+
+* Add max_retries to TextEmbeddingGenerator and Claude3TextGenerator ([#1259](https://github.com/googleapis/python-bigquery-dataframes/issues/1259)) ([8077ff4](https://github.com/googleapis/python-bigquery-dataframes/commit/8077ff49426b103dc5a52eeb86a2c6a869c99825))
+* Bigframes.bigquery.parse_json ([#1265](https://github.com/googleapis/python-bigquery-dataframes/issues/1265)) ([27bbd80](https://github.com/googleapis/python-bigquery-dataframes/commit/27bbd8085ccac175f113afbd6c94b52c034a3d97))
+* Support DataFrame.astype(dict) ([#1262](https://github.com/googleapis/python-bigquery-dataframes/issues/1262)) ([5934f8e](https://github.com/googleapis/python-bigquery-dataframes/commit/5934f8ee0a1c950a820d1911d73a46f6891a40bb))
+
+
+### Bug Fixes
+
+* Avoid global mutation in `BigQueryOptions.client_endpoints_override` ([#1280](https://github.com/googleapis/python-bigquery-dataframes/issues/1280)) ([788f6e9](https://github.com/googleapis/python-bigquery-dataframes/commit/788f6e94a1e80f0ba8741a53a05a467e7b18e902))
+* Fix erroneous window bounds removal during compilation ([#1163](https://github.com/googleapis/python-bigquery-dataframes/issues/1163)) ([f91756a](https://github.com/googleapis/python-bigquery-dataframes/commit/f91756a4413b10f1072c0ae96301fe854bb1ba4e))
+
+
+### Dependencies
+
+* Relax sqlglot upper bound ([#1278](https://github.com/googleapis/python-bigquery-dataframes/issues/1278)) ([c71ec09](https://github.com/googleapis/python-bigquery-dataframes/commit/c71ec093314409cd4c7a52a713dbd6164fbbd792))
+
+
+### Documentation
+
+* Add bq studio links that allows users to generate Jupiter notebooks in bq studio with github contents ([#1266](https://github.com/googleapis/python-bigquery-dataframes/issues/1266)) ([58f13cb](https://github.com/googleapis/python-bigquery-dataframes/commit/58f13cb9ef8bac3222e5013d8ae77dd20f886e30))
+* Add snippet to evaluate ARIMA plus model in the Forecast a single time series with a univariate model tutorial ([#1267](https://github.com/googleapis/python-bigquery-dataframes/issues/1267)) ([3dcae2d](https://github.com/googleapis/python-bigquery-dataframes/commit/3dcae2dca45efdd4493cf3f367bf025ea291f4df))
+* Add snippet to see the ARIMA coefficients in the Forecast a single time series with a univariate model tutorial ([#1268](https://github.com/googleapis/python-bigquery-dataframes/issues/1268)) ([059a564](https://github.com/googleapis/python-bigquery-dataframes/commit/059a564095dfea0518982f13c8118d3807861ccf))
+* Update `bigframes.pandas.pandas` docstrings ([#1247](https://github.com/googleapis/python-bigquery-dataframes/issues/1247)) ([c4bffc3](https://github.com/googleapis/python-bigquery-dataframes/commit/c4bffc3e8ec630a362c94f9d269a66073a14ad04))
+* Use 002 model for better scalability in text generation ([#1270](https://github.com/googleapis/python-bigquery-dataframes/issues/1270)) ([bb7a850](https://github.com/googleapis/python-bigquery-dataframes/commit/bb7a85005ebebfbcb0d2a4d5c4c27b354f38d3d1))
+
 ## [1.31.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.30.0...v1.31.0) (2025-01-05)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 7b6d1f2153..0858c02c1e 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.31.0"
+__version__ = "1.32.0"
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index 7b6d1f2153..0858c02c1e 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.31.0"
+__version__ = "1.32.0"