From 912b8742963cf6f188cb15b17afbd87131704be3 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Fri, 16 Feb 2024 01:49:22 +0000
Subject: [PATCH 01/10] perf: inline read_pandas for small data

---
 bigframes/dataframe.py        | 16 +---------------
 bigframes/operations/base.py  | 12 +-----------
 bigframes/session/__init__.py | 30 +++++++++++++++++++++++++++---
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 9db567a497..8f84979b21 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -69,10 +69,6 @@
     import bigframes.session
 
 
-# BigQuery has 1 MB query size limit, 5000 items shouldn't take more than 10% of this depending on data type.
-# TODO(tbergeron): Convert to bytes-based limit
-MAX_INLINE_DF_SIZE = 5000
-
 LevelType = typing.Hashable
 LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
 SingleItemValue = Union[bigframes.series.Series, int, float, Callable]
@@ -170,17 +166,7 @@ def __init__(
                 columns=columns,  # type:ignore
                 dtype=dtype,  # type:ignore
             )
-            if (
-                pd_dataframe.size < MAX_INLINE_DF_SIZE
-                # TODO(swast): Workaround data types limitation in inline data.
-                and not any(
-                    dt.pyarrow_dtype
-                    for dt in pd_dataframe.dtypes
-                    if isinstance(dt, pandas.ArrowDtype)
-                )
-            ):
-                self._block = blocks.Block.from_local(pd_dataframe)
-            elif session:
+            if session:
                 self._block = session.read_pandas(pd_dataframe)._get_block()
             else:
                 self._block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index 04114b43cb..96134bc4b4 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -104,17 +104,7 @@ def __init__(
             if pd_series.name is None:
                 # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename
                 pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1)
-            if (
-                pd_dataframe.size < MAX_INLINE_SERIES_SIZE
-                # TODO(swast): Workaround data types limitation in inline data.
-                and not any(
-                    dt.pyarrow_dtype
-                    for dt in pd_dataframe.dtypes
-                    if isinstance(dt, pd.ArrowDtype)
-                )
-            ):
-                block = blocks.Block.from_local(pd_dataframe)
-            elif session:
+            if session:
                 block = session.read_pandas(pd_dataframe)._get_block()
             else:
                 # Uses default global session
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index df0cd6e947..b8aad20f5b 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -108,6 +108,10 @@
     "UTF-32LE",
 }
 
+# BigQuery has 1 MB query size limit, 5000 items shouldn't take more than 10% of this depending on data type.
+# TODO(tbergeron): Convert to bytes-based limit
+MAX_INLINE_DF_SIZE = 5000
+
 logger = logging.getLogger(__name__)
 
 
@@ -882,6 +886,26 @@ def read_pandas(self, pandas_dataframe: pandas.DataFrame) -> dataframe.DataFrame
 
     def _read_pandas(
         self, pandas_dataframe: pandas.DataFrame, api_name: str
+    ) -> dataframe.DataFrame:
+        if (
+            pandas_dataframe.size < MAX_INLINE_DF_SIZE
+            # TODO(swast): Workaround data types limitation in inline data.
+            and not any(
+                dt.pyarrow_dtype
+                for dt in pandas_dataframe.dtypes
+                if isinstance(dt, pandas.ArrowDtype)
+            )
+        ):
+            return self._read_pandas_inline(pandas_dataframe)
+        return self._read_pandas_load_job(pandas_dataframe, api_name)
+
+    def _read_pandas_inline(
+        self, pandas_dataframe: pandas.DataFrame
+    ) -> dataframe.DataFrame:
+        return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe))
+
+    def _read_pandas_load_job(
+        self, pandas_dataframe: pandas.DataFrame, api_name: str
     ) -> dataframe.DataFrame:
         col_labels, idx_labels = (
             pandas_dataframe.columns.to_list(),
@@ -1079,7 +1103,7 @@ def read_csv(
                 encoding=encoding,
                 **kwargs,
             )
-            return self.read_pandas(pandas_df)  # type: ignore
+            return self._read_pandas(pandas_df, "read_csv")  # type: ignore
 
     def read_pickle(
         self,
@@ -1096,7 +1120,7 @@ def read_pickle(
         if isinstance(pandas_obj, pandas.Series):
             if pandas_obj.name is None:
                 pandas_obj.name = "0"
-            bigframes_df = self.read_pandas(pandas_obj.to_frame())
+            bigframes_df = self._read_pandas(pandas_obj.to_frame(), "read_pickle")
             return bigframes_df[bigframes_df.columns[0]]
         return self._read_pandas(pandas_obj, "read_pickle")
 
@@ -1196,7 +1220,7 @@ def read_json(
                     engine=engine,
                     **kwargs,
                 )
-            return self.read_pandas(pandas_df)
+            return self._read_pandas(pandas_df, "read_json")
 
     def _check_file_size(self, filepath: str):
         max_size = 1024 * 1024 * 1024  # 1 GB in bytes

From 22dc85c047160beca727258084434bae269a5b64 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Fri, 16 Feb 2024 18:48:49 +0000
Subject: [PATCH 02/10] fix tests

---
 .../getting_started_bq_dataframes.ipynb       | 214 +++++++++++++++++-
 tests/unit/session/test_io_bigquery.py        |   3 +-
 2 files changed, 214 insertions(+), 3 deletions(-)

diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index a9b6aefe30..403e871135 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -346,7 +346,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
       "metadata": {
         "id": "PyQmSRbKA8r-"
       },
@@ -365,6 +365,15 @@
         "### Set BigQuery DataFrames options"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import pandas as pd"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -377,6 +386,81 @@
         "bf.options.bigquery.location = REGION"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Query job 4dc6c41c-3e96-47db-98e4-ae6c2f6bf50e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4dc6c41c-3e96-47db-98e4-ae6c2f6bf50e&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>1</th>\n",
+              "      <th>2</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   1  2\n",
+              "0  1  3\n",
+              "1  2  4\n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "bf.DataFrame({\"1\": [1, 2], \"2\": [3, 4]})"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -386,6 +470,122 @@
         "If you want to reset the location of the created DataFrame or Series objects, reset the session by executing `bf.close_session()`. After that, you can reuse `bf.options.bigquery.location` to specify another location."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "pd_df = pd.DataFrame({\"1\": [1, 2], \"2\": [3, 4]})"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "Load job c9eedc60-a8ca-4b5c-8043-fd38525b5126 is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c9eedc60-a8ca-4b5c-8043-fd38525b5126&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/google/home/garrettwu/.pyenv/versions/3.10.9/lib/python3.10/site-packages/ibis/backends/bigquery/__init__.py:499: FutureWarning: `database` is deprecated as of v7.1, removed in v8.0; The bigquery backend cannot return a table expression using only a `database` specifier. Include a `schema` argument.\n",
+            "  util.warn_deprecated(\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job e9c219ee-880f-464a-9464-cb2dab20d498 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e9c219ee-880f-464a-9464-cb2dab20d498&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "Query job d852b9ab-3814-4fd0-91af-53c57152c027 is DONE. 64 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d852b9ab-3814-4fd0-91af-53c57152c027&page=queryresults\">Open Job</a>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>1</th>\n",
+              "      <th>2</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2 rows × 2 columns</p>\n",
+              "</div>[2 rows x 2 columns in total]"
+            ],
+            "text/plain": [
+              "   1  2\n",
+              "0  1  3\n",
+              "1  2  4\n",
+              "\n",
+              "[2 rows x 2 columns]"
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "bf.read_pandas(pd_df)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -973,6 +1173,18 @@
     "kernelspec": {
       "display_name": "Python 3",
       "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.9"
     }
   },
   "nbformat": 4,
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index 96bb7bf67f..d610574efc 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -81,9 +81,8 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
         "recent-bigframes-api-2": "dataframe-__init__",
         "recent-bigframes-api-3": "dataframe-head",
         "recent-bigframes-api-4": "dataframe-__init__",
+        "recent-bigframes-api-5": "dataframe-__init__",
     }
-    assert labels is not None
-    assert len(labels) == 7
     assert labels == expected_dict
 
 

From e113de7a0cc7717b0426d39c54d02e20f763d346 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Fri, 16 Feb 2024 18:53:45 +0000
Subject: [PATCH 03/10] revert unrelated file

---
 .../getting_started_bq_dataframes.ipynb       | 214 +-----------------
 1 file changed, 1 insertion(+), 213 deletions(-)

diff --git a/notebooks/getting_started/getting_started_bq_dataframes.ipynb b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
index 403e871135..a9b6aefe30 100644
--- a/notebooks/getting_started/getting_started_bq_dataframes.ipynb
+++ b/notebooks/getting_started/getting_started_bq_dataframes.ipynb
@@ -346,7 +346,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "id": "PyQmSRbKA8r-"
       },
@@ -365,15 +365,6 @@
         "### Set BigQuery DataFrames options"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import pandas as pd"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -386,81 +377,6 @@
         "bf.options.bigquery.location = REGION"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Query job 4dc6c41c-3e96-47db-98e4-ae6c2f6bf50e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4dc6c41c-3e96-47db-98e4-ae6c2f6bf50e&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>1</th>\n",
-              "      <th>2</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>1</td>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>2</td>\n",
-              "      <td>4</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>2 rows × 2 columns</p>\n",
-              "</div>[2 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "   1  2\n",
-              "0  1  3\n",
-              "1  2  4\n",
-              "\n",
-              "[2 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 8,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "bf.DataFrame({\"1\": [1, 2], \"2\": [3, 4]})"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -470,122 +386,6 @@
         "If you want to reset the location of the created DataFrame or Series objects, reset the session by executing `bf.close_session()`. After that, you can reuse `bf.options.bigquery.location` to specify another location."
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "pd_df = pd.DataFrame({\"1\": [1, 2], \"2\": [3, 4]})"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "Load job c9eedc60-a8ca-4b5c-8043-fd38525b5126 is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c9eedc60-a8ca-4b5c-8043-fd38525b5126&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/google/home/garrettwu/.pyenv/versions/3.10.9/lib/python3.10/site-packages/ibis/backends/bigquery/__init__.py:499: FutureWarning: `database` is deprecated as of v7.1, removed in v8.0; The bigquery backend cannot return a table expression using only a `database` specifier. Include a `schema` argument.\n",
-            "  util.warn_deprecated(\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job e9c219ee-880f-464a-9464-cb2dab20d498 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e9c219ee-880f-464a-9464-cb2dab20d498&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "Query job d852b9ab-3814-4fd0-91af-53c57152c027 is DONE. 64 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d852b9ab-3814-4fd0-91af-53c57152c027&page=queryresults\">Open Job</a>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>1</th>\n",
-              "      <th>2</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>1</td>\n",
-              "      <td>3</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>2</td>\n",
-              "      <td>4</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>2 rows × 2 columns</p>\n",
-              "</div>[2 rows x 2 columns in total]"
-            ],
-            "text/plain": [
-              "   1  2\n",
-              "0  1  3\n",
-              "1  2  4\n",
-              "\n",
-              "[2 rows x 2 columns]"
-            ]
-          },
-          "execution_count": 9,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "bf.read_pandas(pd_df)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1173,18 +973,6 @@
     "kernelspec": {
       "display_name": "Python 3",
       "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.9"
     }
   },
   "nbformat": 4,

From 3a8520a6167a2981495f5fadd4c324cf4687fbde Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Fri, 16 Feb 2024 21:15:56 +0000
Subject: [PATCH 04/10] fix tests

---
 tests/unit/session/test_io_bigquery.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index d610574efc..406de2b88e 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -23,6 +23,7 @@
 from bigframes.core import log_adapter
 import bigframes.pandas as bpd
 import bigframes.session._io.bigquery as io_bq
+from tests.unit import resources
 
 
 def test_create_job_configs_labels_is_none():
@@ -64,7 +65,9 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
         "bigframes-api": "read_pandas",
         "source": "bigquery-dataframes-temp",
     }
-    df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    df = bpd.DataFrame(
+        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+    )
     # Test running two methods
     df.head()
     df.max()
@@ -88,7 +91,9 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit():
 
 def test_create_job_configs_labels_length_limit_met_and_labels_is_none():
     log_adapter.get_and_reset_api_methods()
-    df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    df = bpd.DataFrame(
+        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+    )
     # Test running methods more than the labels' length limit
     for i in range(66):
         df.head()
@@ -113,7 +118,9 @@ def test_create_job_configs_labels_length_limit_met():
         value = f"test{i}"
         cur_labels[key] = value
     # If cur_labels length is 62, we can only add one label from api_methods
-    df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    df = bpd.DataFrame(
+        {"col1": [1, 2], "col2": [3, 4]}, session=resources.create_bigquery_session()
+    )
     # Test running two methods
     df.head()
     df.max()

From b2e003b383929042d7368bfc0f1dad6e1805107c Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Fri, 16 Feb 2024 21:25:15 +0000
Subject: [PATCH 05/10] fix tests

---
 bigframes/operations/base.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index 96134bc4b4..154247c033 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -30,10 +30,6 @@
 import bigframes.session
 import third_party.bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing
 
-# BigQuery has 1 MB query size limit, 5000 items shouldn't take more than 10% of this depending on data type.
-# TODO(tbergeron): Convert to bytes-based limit
-MAX_INLINE_SERIES_SIZE = 5000
-
 
 class SeriesMethods:
     def __init__(

From 912af6e5ecb94a03de5fc4da75b0f0c4dc95d2e7 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Tue, 20 Feb 2024 19:24:14 +0000
Subject: [PATCH 06/10] fix tests

---
 bigframes/session/__init__.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index b8aad20f5b..79f5323d6d 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -891,9 +891,12 @@ def _read_pandas(
             pandas_dataframe.size < MAX_INLINE_DF_SIZE
             # TODO(swast): Workaround data types limitation in inline data.
             and not any(
-                dt.pyarrow_dtype
-                for dt in pandas_dataframe.dtypes
-                if isinstance(dt, pandas.ArrowDtype)
+                (
+                    isinstance(s.dtype, pandas.ArrowDtype)
+                    or pandas.api.types.is_list_like(s)
+                    or pandas.api.types.is_datetime64_dtype(s)
+                )
+                for _, s in pandas_dataframe.items()
             )
         ):
             return self._read_pandas_inline(pandas_dataframe)

From b533641767d4045b78e2538fc5000884b5a74a1b Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Tue, 20 Feb 2024 21:24:21 +0000
Subject: [PATCH 07/10] fix tests

---
 bigframes/session/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 79f5323d6d..32f9702d3d 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -893,7 +893,7 @@ def _read_pandas(
             and not any(
                 (
                     isinstance(s.dtype, pandas.ArrowDtype)
-                    or pandas.api.types.is_list_like(s)
+                    or (len(s) > 0 and pandas.api.types.is_list_like(s[0]))
                     or pandas.api.types.is_datetime64_dtype(s)
                 )
                 for _, s in pandas_dataframe.items()

From 86f7eeba337d80e677c7a4402d2bc33fef09617a Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Tue, 20 Feb 2024 21:53:58 +0000
Subject: [PATCH 08/10] fix tests

---
 bigframes/session/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 32f9702d3d..e5706ab0a6 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -893,7 +893,7 @@ def _read_pandas(
             and not any(
                 (
                     isinstance(s.dtype, pandas.ArrowDtype)
-                    or (len(s) > 0 and pandas.api.types.is_list_like(s[0]))
+                    or (len(s) > 0 and pandas.api.types.is_list_like(s.iloc[0]))
                     or pandas.api.types.is_datetime64_dtype(s)
                 )
                 for _, s in pandas_dataframe.items()

From d373770ae027582b2b145a6ac485e0607b698257 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Tue, 20 Feb 2024 23:08:30 +0000
Subject: [PATCH 09/10] fix tests

---
 bigframes/session/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index e5706ab0a6..20dd39c0fa 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -894,7 +894,7 @@ def _read_pandas(
                 (
                     isinstance(s.dtype, pandas.ArrowDtype)
                     or (len(s) > 0 and pandas.api.types.is_list_like(s.iloc[0]))
-                    or pandas.api.types.is_datetime64_dtype(s)
+                    or pandas.api.types.is_datetime64_any_dtype(s)
                 )
                 for _, s in pandas_dataframe.items()
             )

From abd65b97bf3f007b24acfc7dc8f8eba3e13b0a42 Mon Sep 17 00:00:00 2001
From: Garrett Wu <garrettwu@google.com>
Date: Tue, 20 Feb 2024 23:38:44 +0000
Subject: [PATCH 10/10] fix tests

---
 tests/system/small/test_progress_bar.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py
index bd13ac2240..1c04b580fc 100644
--- a/tests/system/small/test_progress_bar.py
+++ b/tests/system/small/test_progress_bar.py
@@ -15,10 +15,12 @@
 import re
 import tempfile
 
+import numpy as np
 import pandas as pd
 
 import bigframes as bf
 import bigframes.formatting_helpers as formatting_helpers
+from bigframes.session import MAX_INLINE_DF_SIZE
 
 job_load_message_regex = r"\w+ job [\w-]+ is \w+\."
 
@@ -66,10 +68,15 @@ def test_progress_bar_extract_jobs(
 def test_progress_bar_load_jobs(
     session: bf.Session, penguins_pandas_df_default_index: pd.DataFrame, capsys
 ):
+    # repeat the DF to be big enough to trigger the load job.
+    df = penguins_pandas_df_default_index
+    while len(df) < MAX_INLINE_DF_SIZE:
+        df = pd.DataFrame(np.repeat(df.values, 2, axis=0))
+
     bf.options.display.progress_bar = "terminal"
     with tempfile.TemporaryDirectory() as dir:
         path = dir + "/test_read_csv_progress_bar*.csv"
-        penguins_pandas_df_default_index.to_csv(path, index=False)
+        df.to_csv(path, index=False)
         capsys.readouterr()  # clear output
         session.read_csv(path)