diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
index c8a54fe0b3..21b41eb185 100644
--- a/bigframes/_config/compute_options.py
+++ b/bigframes/_config/compute_options.py
@@ -66,6 +66,14 @@ class ComputeOptions:
             engine to handle. However this comes at the cost of increase cost and latency.
         extra_query_labels (Dict[str, Any], Options):
             Stores additional custom labels for query configuration.
+        semmantic_ops_confirmation_threshold (int, optional):
+            Guards against unexepcted processing of large amount of rows by semantic operators.
+            If the number of rows exceeds the threshold, the user will be asked to confirm
+            their operations to resume. The default value is 0. Set the value to None
+            to turn off the guard.
+        semantic_ops_threshold_autofail (bool):
+            Guards against unexepcted processing of large amount of rows by semantic operators.
+            When set to True, the operation automatically fails without asking for user inputs.
     """
 
     maximum_bytes_billed: Optional[int] = None
@@ -73,6 +81,8 @@ class ComputeOptions:
     extra_query_labels: Dict[str, Any] = dataclasses.field(
         default_factory=dict, init=False
     )
+    semantic_ops_confirmation_threshold: Optional[int] = 0
+    semantic_ops_threshold_autofail = False
 
     def assign_extra_query_labels(self, **kwargs: Any) -> None:
         """
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
index 27f3508ff4..3cb5f3665d 100644
--- a/bigframes/exceptions.py
+++ b/bigframes/exceptions.py
@@ -59,6 +59,10 @@ class QueryComplexityError(RuntimeError):
     """Query plan is too complex to execute."""
 
 
+class OperationAbortedError(RuntimeError):
+    """Operation is aborted."""
+
+
 class TimeTravelDisabledWarning(Warning):
     """A query was reattempted without time travel."""
 
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
index 79b92afe4f..6a537db4f3 100644
--- a/bigframes/operations/semantics.py
+++ b/bigframes/operations/semantics.py
@@ -20,8 +20,8 @@
 
 import numpy as np
 
-import bigframes.core.guid as guid
-import bigframes.dtypes as dtypes
+from bigframes import dtypes, exceptions
+from bigframes.core import guid
 
 
 class Semantics:
@@ -53,6 +53,7 @@ def agg(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -115,6 +116,15 @@ def agg(
         self._validate_model(model)
         columns = self._parse_columns(instruction)
 
+        if max_agg_rows <= 1:
+            raise ValueError(
+                f"Invalid value for `max_agg_rows`: {max_agg_rows}."
+                "It must be greater than 1."
+            )
+
+        work_estimate = len(self._df) * int(max_agg_rows / (max_agg_rows - 1))
+        self._confirm_operation(work_estimate)
+
         df: bigframes.dataframe.DataFrame = self._df.copy()
         for column in columns:
             if column not in self._df.columns:
@@ -135,12 +145,6 @@ def agg(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
-        if max_agg_rows <= 1:
-            raise ValueError(
-                f"Invalid value for `max_agg_rows`: {max_agg_rows}."
-                "It must be greater than 1."
-            )
-
         user_instruction = self._format_instruction(instruction, columns)
 
         num_cluster = 1
@@ -243,6 +247,7 @@ def cluster_by(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator()
@@ -296,6 +301,8 @@ def cluster_by(
                 "It must be greater than 1."
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df.copy()
         embeddings_df = model.predict(df[column])
 
@@ -314,6 +321,7 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -367,6 +375,8 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         for column in columns:
             if df[column].dtype != dtypes.STRING_DTYPE:
@@ -403,6 +413,7 @@ def map(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -462,6 +473,8 @@ def map(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        self._confirm_operation(len(self._df))
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         for column in columns:
             if df[column].dtype != dtypes.STRING_DTYPE:
@@ -490,7 +503,6 @@ def join(
         other,
         instruction: str,
         model,
-        max_rows: int = 1000,
         ground_with_google_search: bool = False,
     ):
         """
@@ -502,6 +514,7 @@ def join(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -561,12 +574,8 @@ def join(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
-        joined_table_rows = len(self._df) * len(other)
-
-        if joined_table_rows > max_rows:
-            raise ValueError(
-                f"Number of rows that need processing is {joined_table_rows}, which exceeds row limit {max_rows}."
-            )
+        work_estimate = len(self._df) * len(other)
+        self._confirm_operation(work_estimate)
 
         left_columns = []
         right_columns = []
@@ -645,6 +654,7 @@ def search(
 
             >>> import bigframes
             >>> bigframes.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
@@ -680,6 +690,8 @@ def search(
         if search_column not in self._df.columns:
             raise ValueError(f"Column `{search_column}` not found")
 
+        self._confirm_operation(len(self._df))
+
         import bigframes.ml.llm as llm
 
         if not isinstance(model, llm.TextEmbeddingGenerator):
@@ -743,6 +755,7 @@ def top_k(
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
             >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
@@ -803,6 +816,9 @@ def top_k(
                 "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models"
             )
 
+        work_estimate = int(len(self._df) * (len(self._df) - 1) / 2)
+        self._confirm_operation(work_estimate)
+
         df: bigframes.dataframe.DataFrame = self._df[columns].copy()
         column = columns[0]
         if df[column].dtype != dtypes.STRING_DTYPE:
@@ -940,9 +956,8 @@ def sim_join(
 
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
-
-            >>> import bigframes
-            >>> bigframes.options.experiments.semantic_operators = True
+            >>> bpd.options.experiments.semantic_operators = True
+            >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25
 
             >>> import bigframes.ml.llm as llm
             >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-005")
@@ -1001,6 +1016,9 @@ def sim_join(
         if top_k < 1:
             raise ValueError("top_k must be an integer greater than or equal to 1.")
 
+        work_estimate = len(self._df) * len(other)
+        self._confirm_operation(work_estimate)
+
         base_table_embedding_column = guid.generate_guid()
         base_table = self._attach_embedding(
             other, right_on, base_table_embedding_column, model
@@ -1072,3 +1090,29 @@ def _validate_model(model):
 
         if not isinstance(model, GeminiTextGenerator):
             raise TypeError("Model is not GeminiText Generator")
+
+    @staticmethod
+    def _confirm_operation(row_count: int):
+        """Raises OperationAbortedError when the confirmation fails"""
+        import bigframes
+
+        threshold = bigframes.options.compute.semantic_ops_confirmation_threshold
+
+        if threshold is None or row_count <= threshold:
+            return
+
+        if bigframes.options.compute.semantic_ops_threshold_autofail:
+            raise exceptions.OperationAbortedError(
+                f"Operation was cancelled because your work estimate is {row_count} rows, which exceeds the threshold {threshold} rows."
+            )
+
+        # Separate the prompt out. In IDE such VS Code, leaving prompt in the
+        # input function makes it less visible to the end user.
+        print(f"This operation will process about {row_count} rows.")
+        print(
+            "You can raise the confirmation threshold by setting `bigframes.options.compute.semantic_ops_confirmation_threshold` to a higher value. To completely turn off the confirmation check, set the threshold to `None`."
+        )
+        print("Proceed? [Y/n]")
+        reply = input().casefold()
+        if reply not in {"y", "yes", ""}:
+            raise exceptions.OperationAbortedError("Operation was cancelled.")
diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
index 374236e152..8a2f083419 100644
--- a/notebooks/experimental/semantic_operators.ipynb
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -153,7 +153,43 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# API Syntax"
+    "*Note*: semantic operators could be expensive over a large set of data. As a result, our team added this option `bigframes.options.compute.sem_ops_confirmation_threshold` at `version 1.31.0` so that the BigQuery Dataframe will ask for your confirmation if the amount of data to be processed is too large. If the amount of rows exceeds your threshold, you will see a prompt for your keyboard input -- 'y' to proceed and 'n' to abort. If you abort the operation, no LLM processing will be done.\n",
+    "\n",
+    "The default threshold is 0, which means the operators will always ask for confirmations. You are free to adjust the value as needed. You can also set the threshold to `None` to disable this feature."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you would like your operations to fail automatically when the data is too large, set `bigframes.options.compute.semantic_ops_threshold_autofail` to `True`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "#     bigframes.options.compute.semantic_ops_threshold_autofail = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# The API"
    ]
   },
   {
@@ -181,7 +217,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -239,7 +275,7 @@
        "[3 rows x 2 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -263,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -317,7 +353,7 @@
        "[1 rows x 2 columns]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -351,7 +387,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -409,7 +445,7 @@
        "[3 rows x 2 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -431,7 +467,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -504,7 +540,7 @@
        "[3 rows x 3 columns]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -531,7 +567,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,7 +584,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -620,7 +656,7 @@
        "[4 rows x 2 columns]"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -633,7 +669,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`. Therefore, our team has added a parameter `max_rows`, a threshold that guards against unexpected expensive calls. With this parameter, the operator first calculates the size of your cross-joined data, and compares it with the threshold. If the size exceeds your threshold, the fuction will abort early with a `ValueError`. You can manually set the value of `max_rows` to raise or lower the threshold."
+    "!! **Important:** Semantic join can trigger probihitively expensitve operations! This operation first cross joins two dataframes, then invokes semantic filter on each row. That means if you have two dataframes of sizes `M` and `N`, the total amount of queries sent to the LLM is on the scale of `M * N`. "
    ]
   },
   {
@@ -654,7 +690,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -670,7 +706,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -754,7 +790,7 @@
        "[6 rows x 2 columns]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -781,7 +817,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -855,7 +891,7 @@
        "[7 rows x 1 columns]"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -884,7 +920,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -903,7 +939,7 @@
        "Name: Movies, dtype: string"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -936,7 +972,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -952,7 +988,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -1009,7 +1045,7 @@
        "[2 rows x 1 columns]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1041,7 +1077,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1105,7 +1141,7 @@
        "[5 rows x 1 columns]"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1124,7 +1160,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -1188,7 +1224,7 @@
        "[2 rows x 2 columns]"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1222,7 +1258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1239,7 +1275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -1325,7 +1361,7 @@
        "[5 rows x 3 columns]"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1338,7 +1374,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `max_rows` parameter to specify a threshold. "
+    "!! **Important** Like semantic join, this operator can also be very expensive. To guard against unexpected processing of large dataset, use the `bigframes.options.compute.sem_ops_confirmation_threshold` option to specify a threshold. "
    ]
   },
   {
@@ -1357,7 +1393,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1373,7 +1409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1415,17 +1451,17 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Smartphone</td>\n",
-       "      <td>2</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Laptop</td>\n",
-       "      <td>2</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Coffee Maker</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1444,16 +1480,16 @@
       ],
       "text/plain": [
        "        Product  Cluster ID\n",
-       "0    Smartphone           2\n",
-       "1        Laptop           2\n",
-       "2  Coffee Maker           2\n",
+       "0    Smartphone           3\n",
+       "1        Laptop           3\n",
+       "2  Coffee Maker           1\n",
        "3       T-shirt           2\n",
        "4         Jeans           2\n",
        "\n",
        "[5 rows x 2 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1487,7 +1523,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -1836,7 +1872,7 @@
        "[3000 rows x 6 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1855,16 +1891,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "2555"
+       "2556"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1883,16 +1919,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "390.61878669276047"
+       "390.29068857589976"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1901,6 +1937,23 @@
     "hacker_news_with_texts['text'].str.len().mean()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[Optional] You can raise the confirmation threshold for a smoother experience."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if Version(bigframes.__version__) >= Version(\"1.31.0\"):\n",
+    "    bigframes.options.compute.semantic_ops_confirmation_threshold = 5000"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1910,9 +1963,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This operation will process about 2556 rows. Proceed? [Y/n]\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -1961,7 +2021,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>419</th>\n",
+       "      <th>420</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Well last time I got angry down votes for sayi...</td>\n",
        "      <td>drieddust</td>\n",
@@ -1970,7 +2030,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>812</th>\n",
+       "      <th>814</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>New iPhone should be announced on September. L...</td>\n",
        "      <td>meerita</td>\n",
@@ -1979,7 +2039,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1512</th>\n",
+       "      <th>1515</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
        "      <td>TheOtherHobbes</td>\n",
@@ -1988,7 +2048,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1559</th>\n",
+       "      <th>1562</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
        "      <td>weberer</td>\n",
@@ -2004,22 +2064,22 @@
       "text/plain": [
        "     title                                               text              by  \\\n",
        "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "419   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "812   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1512  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1559  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
        "\n",
        "      score                  timestamp     type  \n",
        "9      <NA>  2023-04-21 16:45:13+00:00  comment  \n",
-       "419    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
-       "812    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
-       "1512   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
-       "1559   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
+       "420    <NA>  2021-01-11 19:27:27+00:00  comment  \n",
+       "814    <NA>  2019-07-30 20:54:42+00:00  comment  \n",
+       "1515   <NA>  2021-06-08 09:25:24+00:00  comment  \n",
+       "1562   <NA>  2022-09-05 13:16:02+00:00  comment  \n",
        "\n",
        "[5 rows x 6 columns]"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2051,7 +2111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -2104,7 +2164,7 @@
        "      <td>Frustrated, but hopeful.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>419</th>\n",
+       "      <th>420</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Well last time I got angry down votes for sayi...</td>\n",
        "      <td>drieddust</td>\n",
@@ -2114,7 +2174,7 @@
        "      <td>Frustrated and angry.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>812</th>\n",
+       "      <th>814</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>New iPhone should be announced on September. L...</td>\n",
        "      <td>meerita</td>\n",
@@ -2124,7 +2184,7 @@
        "      <td>Excited anticipation.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1512</th>\n",
+       "      <th>1515</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Why would this take a week? i(phone)OS was ori...</td>\n",
        "      <td>TheOtherHobbes</td>\n",
@@ -2134,7 +2194,7 @@
        "      <td>Frustrated, critical, obvious.</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1559</th>\n",
+       "      <th>1562</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt;or because Apple drama brings many clicks?...</td>\n",
        "      <td>weberer</td>\n",
@@ -2151,34 +2211,34 @@
       "text/plain": [
        "     title                                               text              by  \\\n",
        "9     <NA>  It doesn’t work on Safari, and WebKit based br...      archiewood   \n",
-       "419   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
-       "812   <NA>  New iPhone should be announced on September. L...         meerita   \n",
-       "1512  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
-       "1559  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
+       "420   <NA>  Well last time I got angry down votes for sayi...       drieddust   \n",
+       "814   <NA>  New iPhone should be announced on September. L...         meerita   \n",
+       "1515  <NA>  Why would this take a week? i(phone)OS was ori...  TheOtherHobbes   \n",
+       "1562  <NA>  &gt;or because Apple drama brings many clicks?...         weberer   \n",
        "\n",
        "      score                  timestamp     type  \\\n",
        "9      <NA>  2023-04-21 16:45:13+00:00  comment   \n",
-       "419    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
-       "812    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
-       "1512   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
-       "1559   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
+       "420    <NA>  2021-01-11 19:27:27+00:00  comment   \n",
+       "814    <NA>  2019-07-30 20:54:42+00:00  comment   \n",
+       "1515   <NA>  2021-06-08 09:25:24+00:00  comment   \n",
+       "1562   <NA>  2022-09-05 13:16:02+00:00  comment   \n",
        "\n",
        "                             sentiment  \n",
        "9           Frustrated, but hopeful. \n",
        "  \n",
-       "419            Frustrated and angry. \n",
+       "420            Frustrated and angry. \n",
        "  \n",
-       "812            Excited anticipation. \n",
+       "814            Excited anticipation. \n",
        "  \n",
-       "1512  Frustrated, critical, obvious. \n",
+       "1515  Frustrated, critical, obvious. \n",
        "  \n",
-       "1559     Negative, clickbait, Apple. \n",
+       "1562     Negative, clickbait, Apple. \n",
        "  \n",
        "\n",
        "[5 rows x 7 columns]"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2196,14 +2256,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2024-12-27 21:39:10.129973+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3577: UserWarning: Reading cached table from 2025-01-03 01:18:29.080474+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
       "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
      ]
     },
@@ -2553,7 +2613,7 @@
        "[3000 rows x 6 columns]"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2565,9 +2625,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This operation will process about 3000 rows. Proceed? [Y/n]\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -2643,7 +2710,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>208</th>\n",
+       "      <th>209</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I like the idea of moving that arrow the way h...</td>\n",
        "      <td>rattray</td>\n",
@@ -2652,7 +2719,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>227</th>\n",
+       "      <th>228</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I don&amp;#x27;t understand why a beginner would s...</td>\n",
        "      <td>wolco</td>\n",
@@ -2661,7 +2728,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>289</th>\n",
+       "      <th>290</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I leaerned more with one minute of this than a...</td>\n",
        "      <td>agumonkey</td>\n",
@@ -2670,7 +2737,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>302</th>\n",
+       "      <th>303</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I've suggested a &lt;i&gt;rationale&lt;/i&gt; for the tabo...</td>\n",
        "      <td>mechanical_fish</td>\n",
@@ -2679,7 +2746,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>311</th>\n",
+       "      <th>312</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Do you have any reference for this?&lt;p&gt;I&amp;#x27;m...</td>\n",
        "      <td>banashark</td>\n",
@@ -2688,7 +2755,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>321</th>\n",
+       "      <th>322</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Default search scope is an option in the Finde...</td>\n",
        "      <td>kitsunesoba</td>\n",
@@ -2697,7 +2764,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>390</th>\n",
+       "      <th>391</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Orthogonality and biology aren&amp;#x27;t friends.</td>\n",
        "      <td>agumonkey</td>\n",
@@ -2706,7 +2773,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>395</th>\n",
+       "      <th>396</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>I chose some random physics book that was good...</td>\n",
        "      <td>prawn</td>\n",
@@ -2715,7 +2782,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>423</th>\n",
+       "      <th>424</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Seeing this get huge on Twitter. It&amp;#x27;s the...</td>\n",
        "      <td>shenanigoat</td>\n",
@@ -2724,7 +2791,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>427</th>\n",
+       "      <th>428</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Looking through the comments there are a numbe...</td>\n",
        "      <td>moomin</td>\n",
@@ -2733,7 +2800,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>428</th>\n",
+       "      <th>429</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Legacy media is a tough business. GBTC is payi...</td>\n",
        "      <td>arcticbull</td>\n",
@@ -2742,7 +2809,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>435</th>\n",
+       "      <th>436</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Same thing if you sell unsafe food, yet we hav...</td>\n",
        "      <td>jabradoodle</td>\n",
@@ -2751,7 +2818,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>437</th>\n",
+       "      <th>438</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>There was briefly a thing called HSCSD (&amp;quot;...</td>\n",
        "      <td>LeoPanthera</td>\n",
@@ -2760,7 +2827,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>445</th>\n",
+       "      <th>446</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&amp;gt; This article is a bit comical to read and...</td>\n",
        "      <td>lapcat</td>\n",
@@ -2769,7 +2836,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>452</th>\n",
+       "      <th>453</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Large positions are most likely sold off in sm...</td>\n",
        "      <td>meowkit</td>\n",
@@ -2778,7 +2845,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>506</th>\n",
+       "      <th>507</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>A US-based VPN (or really any VPN) is only goi...</td>\n",
        "      <td>RandomBacon</td>\n",
@@ -2787,7 +2854,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>542</th>\n",
+       "      <th>543</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&lt;a href=\"https:&amp;#x2F;&amp;#x2F;codeberg.org&amp;#x2F;A...</td>\n",
        "      <td>ElectronBadger</td>\n",
@@ -2796,7 +2863,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>564</th>\n",
+       "      <th>565</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>It’s much harder for people without hands to w...</td>\n",
        "      <td>Aeolun</td>\n",
@@ -2805,7 +2872,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>611</th>\n",
+       "      <th>612</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>So by using ADMIN_SL0T instead was it just set...</td>\n",
        "      <td>minitoar</td>\n",
@@ -2814,7 +2881,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>658</th>\n",
+       "      <th>660</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>Outstanding!</td>\n",
        "      <td>cafard</td>\n",
@@ -2823,7 +2890,7 @@
        "      <td>comment</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>671</th>\n",
+       "      <th>673</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>On the other hand, something can be said for &amp;...</td>\n",
        "      <td>babby</td>\n",
@@ -2842,87 +2909,87 @@
        "98                                              <NA>   \n",
        "137  FDA reverses marketing ban on Juul e-cigarettes   \n",
        "188                                             <NA>   \n",
-       "208                                             <NA>   \n",
-       "227                                             <NA>   \n",
-       "289                                             <NA>   \n",
-       "302                                             <NA>   \n",
-       "311                                             <NA>   \n",
-       "321                                             <NA>   \n",
-       "390                                             <NA>   \n",
-       "395                                             <NA>   \n",
-       "423                                             <NA>   \n",
-       "427                                             <NA>   \n",
+       "209                                             <NA>   \n",
+       "228                                             <NA>   \n",
+       "290                                             <NA>   \n",
+       "303                                             <NA>   \n",
+       "312                                             <NA>   \n",
+       "322                                             <NA>   \n",
+       "391                                             <NA>   \n",
+       "396                                             <NA>   \n",
+       "424                                             <NA>   \n",
        "428                                             <NA>   \n",
-       "435                                             <NA>   \n",
-       "437                                             <NA>   \n",
-       "445                                             <NA>   \n",
-       "452                                             <NA>   \n",
-       "506                                             <NA>   \n",
-       "542                                             <NA>   \n",
-       "564                                             <NA>   \n",
-       "611                                             <NA>   \n",
-       "658                                             <NA>   \n",
-       "671                                             <NA>   \n",
+       "429                                             <NA>   \n",
+       "436                                             <NA>   \n",
+       "438                                             <NA>   \n",
+       "446                                             <NA>   \n",
+       "453                                             <NA>   \n",
+       "507                                             <NA>   \n",
+       "543                                             <NA>   \n",
+       "565                                             <NA>   \n",
+       "612                                             <NA>   \n",
+       "660                                             <NA>   \n",
+       "673                                             <NA>   \n",
        "\n",
        "                                                  text               by  \\\n",
        "24                                                <NA>   GiraffeNecktie   \n",
        "98   i resisted switching to chrome for months beca...         catshirt   \n",
        "137                                               <NA>        anigbrowl   \n",
        "188  I think it&#x27;s more than hazing. It may be ...    bayesianhorse   \n",
-       "208  I like the idea of moving that arrow the way h...          rattray   \n",
-       "227  I don&#x27;t understand why a beginner would s...            wolco   \n",
-       "289  I leaerned more with one minute of this than a...        agumonkey   \n",
-       "302  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
-       "311  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
-       "321  Default search scope is an option in the Finde...      kitsunesoba   \n",
-       "390     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
-       "395  I chose some random physics book that was good...            prawn   \n",
-       "423  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
-       "427  Looking through the comments there are a numbe...           moomin   \n",
-       "428  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
-       "435  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
-       "437  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
-       "445  &gt; This article is a bit comical to read and...           lapcat   \n",
-       "452  Large positions are most likely sold off in sm...          meowkit   \n",
-       "506  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
-       "542  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
-       "564  It’s much harder for people without hands to w...           Aeolun   \n",
-       "611  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
-       "658                                       Outstanding!           cafard   \n",
-       "671  On the other hand, something can be said for &...            babby   \n",
+       "209  I like the idea of moving that arrow the way h...          rattray   \n",
+       "228  I don&#x27;t understand why a beginner would s...            wolco   \n",
+       "290  I leaerned more with one minute of this than a...        agumonkey   \n",
+       "303  I've suggested a <i>rationale</i> for the tabo...  mechanical_fish   \n",
+       "312  Do you have any reference for this?<p>I&#x27;m...        banashark   \n",
+       "322  Default search scope is an option in the Finde...      kitsunesoba   \n",
+       "391     Orthogonality and biology aren&#x27;t friends.        agumonkey   \n",
+       "396  I chose some random physics book that was good...            prawn   \n",
+       "424  Seeing this get huge on Twitter. It&#x27;s the...      shenanigoat   \n",
+       "428  Looking through the comments there are a numbe...           moomin   \n",
+       "429  Legacy media is a tough business. GBTC is payi...       arcticbull   \n",
+       "436  Same thing if you sell unsafe food, yet we hav...      jabradoodle   \n",
+       "438  There was briefly a thing called HSCSD (&quot;...      LeoPanthera   \n",
+       "446  &gt; This article is a bit comical to read and...           lapcat   \n",
+       "453  Large positions are most likely sold off in sm...          meowkit   \n",
+       "507  A US-based VPN (or really any VPN) is only goi...      RandomBacon   \n",
+       "543  <a href=\"https:&#x2F;&#x2F;codeberg.org&#x2F;A...   ElectronBadger   \n",
+       "565  It’s much harder for people without hands to w...           Aeolun   \n",
+       "612  So by using ADMIN_SL0T instead was it just set...         minitoar   \n",
+       "660                                       Outstanding!           cafard   \n",
+       "673  On the other hand, something can be said for &...            babby   \n",
        "\n",
        "     score                  timestamp     type  \n",
        "24     249  2011-04-19 14:25:17+00:00    story  \n",
        "98    <NA>  2011-04-06 08:02:24+00:00  comment  \n",
        "137      2  2024-06-06 16:42:40+00:00    story  \n",
        "188   <NA>  2015-06-18 16:42:53+00:00  comment  \n",
-       "208   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
-       "227   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
-       "289   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
-       "302   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
-       "311   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
-       "321   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
-       "390   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
-       "395   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
-       "423   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
-       "427   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
-       "428   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
-       "435   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
-       "437   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
-       "445   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
-       "452   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
-       "506   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
-       "542   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
-       "564   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
-       "611   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
-       "658   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
-       "671   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
+       "209   <NA>  2015-06-08 02:15:30+00:00  comment  \n",
+       "228   <NA>  2019-02-03 14:35:43+00:00  comment  \n",
+       "290   <NA>  2016-07-16 06:19:39+00:00  comment  \n",
+       "303   <NA>  2008-12-17 04:42:02+00:00  comment  \n",
+       "312   <NA>  2023-11-13 19:57:00+00:00  comment  \n",
+       "322   <NA>  2017-08-13 17:15:19+00:00  comment  \n",
+       "391   <NA>  2016-04-24 16:33:41+00:00  comment  \n",
+       "396   <NA>  2011-03-27 22:29:51+00:00  comment  \n",
+       "424   <NA>  2016-01-09 03:04:22+00:00  comment  \n",
+       "428   <NA>  2024-10-01 14:37:04+00:00  comment  \n",
+       "429   <NA>  2021-04-16 16:30:33+00:00  comment  \n",
+       "436   <NA>  2023-08-03 20:47:52+00:00  comment  \n",
+       "438   <NA>  2019-02-11 19:49:29+00:00  comment  \n",
+       "446   <NA>  2023-01-02 16:00:49+00:00  comment  \n",
+       "453   <NA>  2021-01-27 23:22:48+00:00  comment  \n",
+       "507   <NA>  2019-04-05 00:58:58+00:00  comment  \n",
+       "543   <NA>  2023-12-13 08:13:15+00:00  comment  \n",
+       "565   <NA>  2024-05-03 11:58:13+00:00  comment  \n",
+       "612   <NA>  2021-03-05 16:07:56+00:00  comment  \n",
+       "660   <NA>  2022-06-09 09:51:54+00:00  comment  \n",
+       "673   <NA>  2013-08-12 00:31:02+00:00  comment  \n",
        "...\n",
        "\n",
        "[123 rows x 6 columns]"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/tests/system/large/operations/test_semantics.py b/tests/system/large/operations/test_semantics.py
index 7602be2fca..20219ef46e 100644
--- a/tests/system/large/operations/test_semantics.py
+++ b/tests/system/large/operations/test_semantics.py
@@ -12,22 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from contextlib import nullcontext
+from unittest.mock import patch
+
 import pandas as pd
 import pandas.testing
 import pytest
 
 import bigframes
-import bigframes.dataframe as dataframe
-import bigframes.dtypes as dtypes
+from bigframes import dataframe, dtypes, exceptions
+
+EXPERIMENT_OPTION = "experiments.semantic_operators"
+THRESHOLD_OPTION = "compute.semantic_ops_confirmation_threshold"
 
 
 def test_semantics_experiment_off_raise_error():
-    bigframes.options.experiments.semantic_operators = False
     df = dataframe.DataFrame(
         {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
     )
 
-    with pytest.raises(NotImplementedError):
+    with bigframes.option_context(EXPERIMENT_OPTION, False), pytest.raises(
+        NotImplementedError
+    ):
         df.semantics
 
 
@@ -44,7 +50,6 @@ def test_semantics_experiment_off_raise_error():
     ],
 )
 def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -61,20 +66,66 @@ def test_agg(session, gemini_flash_model, max_agg_rows, cluster_column):
         session=session,
     )
     instruction = "Find the shared first name of actors in {Movies}. One word answer."
-    actual_s = df.semantics.agg(
-        instruction,
-        model=gemini_flash_model,
-        max_agg_rows=max_agg_rows,
-        cluster_column=cluster_column,
-    ).to_pandas()
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        50,
+    ):
+        actual_s = df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+            max_agg_rows=max_agg_rows,
+            cluster_column=cluster_column,
+        ).to_pandas()
 
     expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Movies"
     pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_agg_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "Movies": [
+                "Titanic",
+                "The Wolf of Wall Street",
+                "Killers of the Flower Moon",
+                "The Revenant",
+                "Inception",
+                "Shuttle Island",
+                "The Great Gatsby",
+            ],
+            "Years": [1997, 2013, 2023, 2015, 2010, 2010, 2013],
+        },
+        session=session,
+    )
+    instruction = "Find the shared first name of actors in {Movies}. One word answer."
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+        )
+
+
 def test_agg_w_int_column(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -86,10 +137,17 @@ def test_agg_w_int_column(session, gemini_flash_model):
         session=session,
     )
     instruction = "Find the {Years} Leonardo DiCaprio acted in the most movies. Answer with the year only."
-    actual_s = df.semantics.agg(
-        instruction,
-        model=gemini_flash_model,
-    ).to_pandas()
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_s = df.semantics.agg(
+            instruction,
+            model=gemini_flash_model,
+        ).to_pandas()
 
     expected_s = pd.Series(["2013 \n"], dtype=dtypes.STRING_DTYPE)
     expected_s.name = "Years"
@@ -117,7 +175,6 @@ def test_agg_w_int_column(session, gemini_flash_model):
     ],
 )
 def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -128,7 +185,14 @@ def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
             "Year": [1997, 2013, 2023],
         },
     )
-    df.semantics.agg(instruction, gemini_flash_model)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.agg(instruction, gemini_flash_model)
 
 
 @pytest.mark.parametrize(
@@ -145,7 +209,6 @@ def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
     ],
 )
 def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "Movies": [
@@ -157,7 +220,14 @@ def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_colu
         },
     )
     instruction = "Find the shared first name of actors in {Movies}. One word answer."
-    df.semantics.agg(instruction, gemini_flash_model, cluster_column=cluster_column)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.agg(instruction, gemini_flash_model, cluster_column=cluster_column)
 
 
 @pytest.mark.parametrize(
@@ -168,7 +238,6 @@ def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_colu
     ],
 )
 def test_cluster_by(session, text_embedding_generator, n_clusters):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         (
             {
@@ -186,28 +255,82 @@ def test_cluster_by(session, text_embedding_generator, n_clusters):
         session=session,
     )
     output_column = "cluster id"
-    result = df.semantics.cluster_by(
-        "Item",
-        output_column,
-        text_embedding_generator,
-        n_clusters=n_clusters,
-    )
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        result = df.semantics.cluster_by(
+            "Item",
+            output_column,
+            text_embedding_generator,
+            n_clusters=n_clusters,
+        )
 
     assert output_column in result
     # In rare cases, it's possible to have fewer than K clusters due to randomness.
     assert len(result[output_column].unique()) <= n_clusters
 
 
-def test_cluster_by_invalid_column(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_cluster_by_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df = dataframe.DataFrame(
+        (
+            {
+                "Item": [
+                    "Orange",
+                    "Cantaloupe",
+                    "Watermelon",
+                    "Chicken",
+                    "Duck",
+                    "Hen",
+                    "Rooster",
+                ]
+            }
+        ),
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.cluster_by(
+            "Item",
+            "cluster id",
+            text_embedding_generator,
+            n_clusters=2,
+        )
+
 
+def test_cluster_by_invalid_column(session, text_embedding_generator):
     df = dataframe.DataFrame(
         ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
         session=session,
     )
-
     output_column = "cluster id"
-    with pytest.raises(ValueError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.cluster_by(
             "unknown_column",
             output_column,
@@ -217,15 +340,18 @@ def test_cluster_by_invalid_column(session, text_embedding_generator):
 
 
 def test_cluster_by_invalid_model(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
-
     df = dataframe.DataFrame(
         ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
         session=session,
     )
-
     output_column = "cluster id"
-    with pytest.raises(TypeError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.cluster_by(
             "Product",
             output_column,
@@ -235,7 +361,6 @@ def test_cluster_by_invalid_model(session, gemini_flash_model):
 
 
 def test_filter(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "country": ["USA", "Germany"],
@@ -245,9 +370,15 @@ def test_filter(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = df.semantics.filter(
-        "{city} is the capital of {country} in {year}", gemini_flash_model
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.filter(
+            "{city} is the capital of {country} in {year}", gemini_flash_model
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {"country": ["Germany"], "city": ["Berlin"], "year": [2024]}, index=[1]
@@ -257,16 +388,52 @@ def test_filter(session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_filter_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "country": ["USA", "Germany"],
+            "city": ["Seattle", "Berlin"],
+            "year": [2023, 2024],
+        },
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.filter(
+            "{city} is the capital of {country} in {year}", gemini_flash_model
+        )
+
+
 def test_filter_single_column_reference(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]},
         session=session,
     )
 
-    actual_df = df.semantics.filter(
-        "{country} is in Europe", gemini_flash_model
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.filter(
+            "{country} is in Europe", gemini_flash_model
+        ).to_pandas()
 
     expected_df = pd.DataFrame({"country": ["Germany"], "city": ["Berlin"]}, index=[1])
     pandas.testing.assert_frame_equal(
@@ -295,25 +462,32 @@ def test_filter_single_column_reference(session, gemini_flash_model):
     ],
 )
 def test_filter_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame({"id": [1, 2], "city": ["Seattle", "Berlin"]})
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.filter(instruction, gemini_flash_model)
 
 
 def test_filter_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.filter("{city} is the capital of {country}", None)
 
 
 def test_map(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "ingredient_1": ["Burger Bun", "Soy Bean"],
@@ -323,11 +497,17 @@ def test_map(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = df.semantics.map(
-        "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
-        "food",
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = df.semantics.map(
+            "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
+            "food",
+            gemini_flash_model,
+        ).to_pandas()
     # Result sanitation
     actual_df["food"] = actual_df["food"].str.strip().str.lower()
 
@@ -348,6 +528,39 @@ def test_map(session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_map_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    df = dataframe.DataFrame(
+        data={
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+            "gluten-free": [True, True],
+        },
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.map(
+            "What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
+            "food",
+            gemini_flash_model,
+        )
+
+
 @pytest.mark.parametrize(
     "instruction",
     [
@@ -369,7 +582,6 @@ def test_map(session, gemini_flash_model):
     ],
 )
 def test_map_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "id": [1, 2],
@@ -378,12 +590,16 @@ def test_map_invalid_instruction_raise_error(instruction, gemini_flash_model):
         }
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.map(instruction, "food", gemini_flash_model)
 
 
 def test_map_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={
             "ingredient_1": ["Burger Bun", "Soy Bean"],
@@ -391,7 +607,12 @@ def test_map_invalid_model_raise_error():
         },
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.map(
             "What is the food made from {ingredient_1} and {ingredient_2}? One word only.",
             "food",
@@ -414,7 +635,6 @@ def test_map_invalid_model_raise_error():
     ],
 )
 def test_join(instruction, session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     cities = dataframe.DataFrame(
         data={
             "city": ["Seattle", "Berlin"],
@@ -426,11 +646,17 @@ def test_join(instruction, session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = cities.semantics.join(
-        countries,
-        instruction,
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = cities.semantics.join(
+            countries,
+            instruction,
+            gemini_flash_model,
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {
@@ -447,8 +673,42 @@ def test_join(instruction, session, gemini_flash_model):
     )
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_join_with_confirmation(session, gemini_flash_model, reply, monkeypatch):
+    cities = dataframe.DataFrame(
+        data={
+            "city": ["Seattle", "Berlin"],
+        },
+        session=session,
+    )
+    countries = dataframe.DataFrame(
+        data={"country": ["USA", "UK", "Germany"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        cities.semantics.join(
+            countries,
+            "{city} is in {country}",
+            gemini_flash_model,
+        )
+
+
 def test_self_join(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     animals = dataframe.DataFrame(
         data={
             "animal": ["spider", "capybara"],
@@ -456,11 +716,17 @@ def test_self_join(session, gemini_flash_model):
         session=session,
     )
 
-    actual_df = animals.semantics.join(
-        animals,
-        "{left.animal} is heavier than {right.animal}",
-        gemini_flash_model,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_df = animals.semantics.join(
+            animals,
+            "{left.animal} is heavier than {right.animal}",
+            gemini_flash_model,
+        ).to_pandas()
 
     expected_df = pd.DataFrame(
         {
@@ -477,25 +743,6 @@ def test_self_join(session, gemini_flash_model):
     )
 
 
-def test_join_data_too_large_raise_error(session, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
-    cities = dataframe.DataFrame(
-        data={
-            "city": ["Seattle", "Berlin"],
-        },
-        session=session,
-    )
-    countries = dataframe.DataFrame(
-        data={"country": ["USA", "UK", "Germany"]},
-        session=session,
-    )
-
-    with pytest.raises(ValueError):
-        cities.semantics.join(
-            countries, "{city} belongs to {country}", gemini_flash_model, max_rows=1
-        )
-
-
 @pytest.mark.parametrize(
     ("instruction", "error_pattern"),
     [
@@ -521,7 +768,6 @@ def test_join_data_too_large_raise_error(session, gemini_flash_model):
 def test_join_invalid_instruction_raise_error(
     instruction, error_pattern, gemini_flash_model
 ):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         {"city": ["Seattle", "Berlin"], "country": ["USA", "Germany"]}
     )
@@ -532,16 +778,25 @@ def test_join_invalid_instruction_raise_error(
         }
     )
 
-    with pytest.raises(ValueError, match=error_pattern):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError, match=error_pattern):
         df1.semantics.join(df2, instruction, gemini_flash_model)
 
 
 def test_join_invalid_model_raise_error():
-    bigframes.options.experiments.semantic_operators = True
     cities = dataframe.DataFrame({"city": ["Seattle", "Berlin"]})
     countries = dataframe.DataFrame({"country": ["USA", "UK", "Germany"]})
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         cities.semantics.join(countries, "{city} is in {country}", None)
 
 
@@ -553,19 +808,24 @@ def test_join_invalid_model_raise_error():
     ],
 )
 def test_search(session, text_embedding_generator, score_column):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    actual_result = df.semantics.search(
-        "creatures",
-        "monkey",
-        top_k=2,
-        model=text_embedding_generator,
-        score_column=score_column,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_result = df.semantics.search(
+            "creatures",
+            "monkey",
+            top_k=2,
+            model=text_embedding_generator,
+            score_column=score_column,
+        ).to_pandas()
 
     expected_result = pd.Series(
         ["baboons", "chimpanzee"], index=[2, 4], name="creatures"
@@ -583,38 +843,82 @@ def test_search(session, text_embedding_generator, score_column):
         assert score_column in actual_result.columns
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_search_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df = dataframe.DataFrame(
+        data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df.semantics.search(
+            "creatures",
+            "monkey",
+            top_k=2,
+            model=text_embedding_generator,
+        )
+
+
 def test_search_invalid_column_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.search(
             "whatever", "monkey", top_k=2, model=text_embedding_generator
         )
 
 
 def test_search_invalid_model_raises_error(session):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df.semantics.search("creatures", "monkey", top_k=2, model=None)
 
 
 def test_search_invalid_top_k_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.search(
             "creatures", "monkey", top_k=0, model=text_embedding_generator
         )
@@ -628,7 +932,6 @@ def test_search_invalid_top_k_raises_error(session, text_embedding_generator):
     ],
 )
 def test_sim_join(session, text_embedding_generator, score_column):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -638,14 +941,20 @@ def test_sim_join(session, text_embedding_generator, score_column):
         session=session,
     )
 
-    actual_result = df1.semantics.sim_join(
-        df2,
-        left_on="creatures",
-        right_on="creatures",
-        model=text_embedding_generator,
-        top_k=1,
-        score_column=score_column,
-    ).to_pandas()
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_result = df1.semantics.sim_join(
+            df2,
+            left_on="creatures",
+            right_on="creatures",
+            model=text_embedding_generator,
+            top_k=1,
+            score_column=score_column,
+        ).to_pandas()
 
     expected_result = pd.DataFrame(
         {"creatures": ["salmon", "cat"], "creatures_1": ["tuna", "dog"]}
@@ -663,6 +972,43 @@ def test_sim_join(session, text_embedding_generator, score_column):
         assert score_column in actual_result.columns
 
 
+@pytest.mark.parametrize(
+    ("reply"),
+    [
+        pytest.param("y"),
+        pytest.param(
+            "n", marks=pytest.mark.xfail(raises=exceptions.OperationAbortedError)
+        ),
+    ],
+)
+def test_sim_join_with_confirmation(
+    session, text_embedding_generator, reply, monkeypatch
+):
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+    monkeypatch.setattr("builtins.input", lambda: reply)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        0,
+    ):
+        df1.semantics.sim_join(
+            df2,
+            left_on="creatures",
+            right_on="creatures",
+            model=text_embedding_generator,
+            top_k=1,
+        )
+
+
 @pytest.mark.parametrize(
     ("left_on", "right_on"),
     [
@@ -673,7 +1019,6 @@ def test_sim_join(session, text_embedding_generator, score_column):
 def test_sim_join_invalid_column_raises_error(
     session, text_embedding_generator, left_on, right_on
 ):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -683,14 +1028,18 @@ def test_sim_join_invalid_column_raises_error(
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2, left_on=left_on, right_on=right_on, model=text_embedding_generator
         )
 
 
 def test_sim_join_invalid_model_raises_error(session):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -700,14 +1049,18 @@ def test_sim_join_invalid_model_raises_error(session):
         session=session,
     )
 
-    with pytest.raises(TypeError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(TypeError):
         df1.semantics.sim_join(
             df2, left_on="creatures", right_on="creatures", model=None
         )
 
 
 def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -717,7 +1070,12 @@ def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2,
             left_on="creatures",
@@ -728,7 +1086,6 @@ def test_sim_join_invalid_top_k_raises_error(session, text_embedding_generator):
 
 
 def test_sim_join_data_too_large_raises_error(session, text_embedding_generator):
-    bigframes.options.experiments.semantic_operators = True
     df1 = dataframe.DataFrame(
         data={"creatures": ["salmon", "cat"]},
         session=session,
@@ -738,7 +1095,12 @@ def test_sim_join_data_too_large_raises_error(session, text_embedding_generator)
         session=session,
     )
 
-    with pytest.raises(ValueError):
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df1.semantics.sim_join(
             df2,
             left_on="creatures",
@@ -774,7 +1136,6 @@ def test_sim_join_data_too_large_raises_error(session, text_embedding_generator)
     ],
 )
 def test_top_k_invalid_instruction_raise_error(instruction, gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame(
         {
             "Animals": ["Dog", "Cat", "Bird", "Horse"],
@@ -782,15 +1143,97 @@ def test_top_k_invalid_instruction_raise_error(instruction, gemini_flash_model):
             "index": ["a", "b", "c", "d"],
         }
     )
-    df.semantics.top_k(instruction, model=gemini_flash_model, k=2)
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        df.semantics.top_k(instruction, model=gemini_flash_model, k=2)
 
 
 def test_top_k_invalid_k_raise_error(gemini_flash_model):
-    bigframes.options.experiments.semantic_operators = True
     df = dataframe.DataFrame({"Animals": ["Dog", "Cat", "Bird", "Horse"]})
-    with pytest.raises(ValueError):
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ), pytest.raises(ValueError):
         df.semantics.top_k(
             "{Animals} are more popular as pets",
             gemini_flash_model,
             k=0,
         )
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__below_threshold_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        3,
+    ):
+        df.semantics._confirm_operation(1)
+
+    mock_input.assert_not_called()
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__threshold_is_none_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        None,
+    ):
+        df.semantics._confirm_operation(100)
+
+    mock_input.assert_not_called()
+
+
+@patch("builtins.input", return_value="")
+def test_confirm_operation__threshold_autofail_do_not_confirm(mock_input):
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        1,
+        "compute.semantic_ops_threshold_autofail",
+        True,
+    ), pytest.raises(exceptions.OperationAbortedError):
+        df.semantics._confirm_operation(100)
+
+    mock_input.assert_not_called()
+
+
+@pytest.mark.parametrize(
+    ("reply", "expectation"),
+    [
+        ("y", nullcontext()),
+        ("yes", nullcontext()),
+        ("", nullcontext()),
+        ("n", pytest.raises(exceptions.OperationAbortedError)),
+        ("something", pytest.raises(exceptions.OperationAbortedError)),
+    ],
+)
+def test_confirm_operation__above_threshold_confirm(reply, expectation, monkeypatch):
+    monkeypatch.setattr("builtins.input", lambda: reply)
+    df = dataframe.DataFrame({})
+
+    with bigframes.option_context(
+        EXPERIMENT_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        3,
+    ), expectation as e:
+        assert df.semantics._confirm_operation(4) == e