From 16b9bcfa65056d1da18481962249d7048b276b9b Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Tue, 24 Oct 2023 00:14:53 +0000
Subject: [PATCH 1/7] docs: link to ML.EVALUATE BQML page for score() methods

---
 bigframes/ml/ensemble.py                             | 10 ++++++++++
 bigframes/ml/forecasting.py                          |  5 +++++
 third_party/bigframes_vendored/sklearn/base.py       | 12 +++++++++++-
 .../bigframes_vendored/sklearn/cluster/_kmeans.py    |  8 ++++++--
 .../bigframes_vendored/sklearn/decomposition/_pca.py |  7 ++++++-
 5 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 113ad872b5..764f00ed12 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -507,6 +507,11 @@ def score(
     ):
         """Calculate evaluation metrics of the model.
 
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
+
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 A BigQuery DataFrame as evaluation data.
@@ -676,6 +681,11 @@ def score(
     ):
         """Calculate evaluation metrics of the model.
 
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
+
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 A BigQuery DataFrame as evaluation data.
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index 8a6de1dd81..b88518a843 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -112,6 +112,11 @@ def score(
     ) -> bpd.DataFrame:
         """Calculate evaluation metrics of the model.
 
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
+
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 A BigQuery DataFrame only contains 1 column as
diff --git a/third_party/bigframes_vendored/sklearn/base.py b/third_party/bigframes_vendored/sklearn/base.py
index 42868ce51f..4d039be60d 100644
--- a/third_party/bigframes_vendored/sklearn/base.py
+++ b/third_party/bigframes_vendored/sklearn/base.py
@@ -85,6 +85,11 @@ def score(self, X, y):
         which is a harsh metric since you require for each sample that
         each label set be correctly predicted.
 
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
+
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 DataFrame of shape (n_samples, n_features). Test samples.
@@ -105,7 +110,12 @@ class RegressorMixin:
     _estimator_type = "regressor"
 
     def score(self, X, y):
-        """Return the evaluation metrics of the model.
+        """Calculate evaluation metrics of the model.
+
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
index ece62dc147..7b22bb4560 100644
--- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
+++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
@@ -12,7 +12,6 @@
 # License: BSD 3 clause
 
 from abc import ABC
-from typing import List, Optional
 
 from bigframes import constants
 from third_party.bigframes_vendored.sklearn.base import BaseEstimator
@@ -83,7 +82,12 @@ def score(
         X,
         y=None,
     ):
-        """Metrics of the model.
+        """Calculate evaluation metrics of the model.
+
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
index 97fee5a501..0326a10c2d 100644
--- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
+++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
@@ -55,7 +55,12 @@ def fit(self, X, y=None):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def score(self, X=None, y=None):
-        """Return the metrics of the model.
+        """Calculate evaluation metrics of the model.
+
+        .. note::
+
+            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
+            for evaluateing model metrics.
 
         Args:
             X (default None):

From 10c30cb47fa28970ff80e6c4e5a0ba3c4ba10d16 Mon Sep 17 00:00:00 2001
From: Henry Solberg <henry.j.solberg@gmail.com>
Date: Wed, 25 Oct 2023 15:52:16 -0700
Subject: [PATCH 2/7] test: allow for alternative PCA solutions in tests (#143)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 tests/system/large/ml/test_decomposition.py |  4 ++--
 tests/system/large/ml/test_pipeline.py      | 14 ++++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py
index 460f07b816..a7049d4c18 100644
--- a/tests/system/large/ml/test_decomposition.py
+++ b/tests/system/large/ml/test_decomposition.py
@@ -67,8 +67,8 @@ def test_decomposition_configure_fit_score_predict(
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
     pd.testing.assert_frame_equal(
-        result.sort_index(),
-        expected,
+        abs(result.sort_index()),  # results may differ by a minus sign
+        abs(expected),
         check_exact=False,
         rtol=0.1,
     )
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 9294740dd6..6874a9f301 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -431,10 +431,16 @@ def test_pipeline_PCA_fit_score_predict(session, penguins_df_default_index):
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
     pd.testing.assert_frame_equal(
-        predictions[
-            ["principal_component_1", "principal_component_2", "principal_component_3"]
-        ],
-        expected,
+        abs(  # results may differ by a minus sign
+            predictions[
+                [
+                    "principal_component_1",
+                    "principal_component_2",
+                    "principal_component_3",
+                ]
+            ]
+        ),
+        abs(expected),
         check_exact=False,
         rtol=0.1,
     )

From 5ab92f054d9dd370ecc95f44a8685ffa61b0a798 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 26 Oct 2023 00:16:15 +0000
Subject: [PATCH 3/7] ci: Disable presubmit LLM tests temporarily (#144)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 tests/system/small/ml/test_llm.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index b7257dde1b..a801c36c83 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -26,6 +26,9 @@ def test_create_text_generator_model(palm2_text_generator_model):
     assert palm2_text_generator_model._bqml_model is not None
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_create_text_generator_model_default_session(bq_connection, llm_text_pandas_df):
     import bigframes.pandas as bpd
@@ -48,6 +51,9 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan
     assert all(series.str.len() > 20)
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_create_text_generator_model_default_connection(llm_text_pandas_df):
     from bigframes import _config
@@ -74,6 +80,9 @@ def test_create_text_generator_model_default_connection(llm_text_pandas_df):
 
 
 # Marked as flaky only because BQML LLM is in preview, the service only has limited capacity, not stable enough.
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_default_params_success(
     palm2_text_generator_model, llm_text_df
@@ -85,6 +94,9 @@ def test_text_generator_predict_default_params_success(
     assert all(series.str.len() > 20)
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_series_default_params_success(
     palm2_text_generator_model, llm_text_df
@@ -96,6 +108,9 @@ def test_text_generator_predict_series_default_params_success(
     assert all(series.str.len() > 20)
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_arbitrary_col_label_success(
     palm2_text_generator_model, llm_text_df
@@ -108,6 +123,9 @@ def test_text_generator_predict_arbitrary_col_label_success(
     assert all(series.str.len() > 20)
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_with_params_success(
     palm2_text_generator_model, llm_text_df
@@ -139,6 +157,9 @@ def test_create_text_embedding_generator_model_defaults(bq_connection):
     assert model._bqml_model is not None
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_embedding_generator_predict_success(
     palm2_embedding_generator_model, llm_text_df
@@ -152,6 +173,9 @@ def test_embedding_generator_predict_success(
     assert value.size == 768
 
 
+@pytest.mark.skip(
+    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
+)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_embedding_generator_predict_series_success(
     palm2_embedding_generator_model, llm_text_df

From b66d1a1be33ff8c32f9b11ef87a0aaf2809a889f Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 19:39:22 -0700
Subject: [PATCH 4/7] chore(main): release 0.11.0 (#126)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md         | 20 ++++++++++++++++++++
 bigframes/version.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d9f63d4c6..93ebadb56f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,26 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.11.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.10.0...v0.11.0) (2023-10-26)
+
+
+### Features
+
+* Add back `reset_session` as an alias for `close_session` ([#124](https://github.com/googleapis/python-bigquery-dataframes/issues/124)) ([694a85a](https://github.com/googleapis/python-bigquery-dataframes/commit/694a85a0ef90d838700014a204d72b23362db1d8))
+* Change `query` parameter to `query_or_table` in `read_gbq` ([#127](https://github.com/googleapis/python-bigquery-dataframes/issues/127)) ([f9bb3c4](https://github.com/googleapis/python-bigquery-dataframes/commit/f9bb3c4bc88c5ba2be6f17e12a0ec4f482ce161f))
+
+
+### Bug Fixes
+
+* Expose `bigframes.pandas.reset_session` as a public API ([#128](https://github.com/googleapis/python-bigquery-dataframes/issues/128)) ([b17e1f4](https://github.com/googleapis/python-bigquery-dataframes/commit/b17e1f43cd0f7567bc5b59b0e916cd20528312b3))
+* Use series's own session in series.reindex listlike case ([#135](https://github.com/googleapis/python-bigquery-dataframes/issues/135)) ([95bff3f](https://github.com/googleapis/python-bigquery-dataframes/commit/95bff3f1902bc09dc3310798a42df8ffd31ed8ee))
+
+
+### Documentation
+
+* Add runnable code samples for DataFrames I/O methods and property ([#129](https://github.com/googleapis/python-bigquery-dataframes/issues/129)) ([6fea8ef](https://github.com/googleapis/python-bigquery-dataframes/commit/6fea8efac35871985677ebeb948a576e64a1ffa4))
+* Add runnable code samples for reading methods ([#125](https://github.com/googleapis/python-bigquery-dataframes/issues/125)) ([a669919](https://github.com/googleapis/python-bigquery-dataframes/commit/a669919ff25b56156bd70ccd816a0bf19adb48aa))
+
 ## [0.10.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.9.0...v0.10.0) (2023-10-19)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 7a37ebd220..18edfa5615 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.10.0"
+__version__ = "0.11.0"

From e80abdb70b5d154648cba7eeb38393958ab533ad Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 26 Oct 2023 05:12:13 +0000
Subject: [PATCH 5/7] Revert "ci: Disable presubmit LLM tests temporarily
 (#144)" (#148)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 1641aff37d601b47e0bc4f25ff148be4f718bd1a, which was merged due to automerge label while still being discussed.

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 tests/system/small/ml/test_llm.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index a801c36c83..b7257dde1b 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -26,9 +26,6 @@ def test_create_text_generator_model(palm2_text_generator_model):
     assert palm2_text_generator_model._bqml_model is not None
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_create_text_generator_model_default_session(bq_connection, llm_text_pandas_df):
     import bigframes.pandas as bpd
@@ -51,9 +48,6 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan
     assert all(series.str.len() > 20)
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_create_text_generator_model_default_connection(llm_text_pandas_df):
     from bigframes import _config
@@ -80,9 +74,6 @@ def test_create_text_generator_model_default_connection(llm_text_pandas_df):
 
 
 # Marked as flaky only because BQML LLM is in preview, the service only has limited capacity, not stable enough.
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_default_params_success(
     palm2_text_generator_model, llm_text_df
@@ -94,9 +85,6 @@ def test_text_generator_predict_default_params_success(
     assert all(series.str.len() > 20)
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_series_default_params_success(
     palm2_text_generator_model, llm_text_df
@@ -108,9 +96,6 @@ def test_text_generator_predict_series_default_params_success(
     assert all(series.str.len() > 20)
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_arbitrary_col_label_success(
     palm2_text_generator_model, llm_text_df
@@ -123,9 +108,6 @@ def test_text_generator_predict_arbitrary_col_label_success(
     assert all(series.str.len() > 20)
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_text_generator_predict_with_params_success(
     palm2_text_generator_model, llm_text_df
@@ -157,9 +139,6 @@ def test_create_text_embedding_generator_model_defaults(bq_connection):
     assert model._bqml_model is not None
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_embedding_generator_predict_success(
     palm2_embedding_generator_model, llm_text_df
@@ -173,9 +152,6 @@ def test_embedding_generator_predict_success(
     assert value.size == 768
 
 
-@pytest.mark.skip(
-    reason="Temporarily disable to validate the hypothesis that LLM capacity is causing the presubmit tests to take long to run."
-)
 @pytest.mark.flaky(retries=2, delay=120)
 def test_embedding_generator_predict_series_success(
     palm2_embedding_generator_model, llm_text_df

From ef41bc7e5d774a4231fadbaae5831aea82fa9f01 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 26 Oct 2023 11:48:14 -0500
Subject: [PATCH 6/7] refactor: make `to_pandas()` call `to_arrow()` and use
 local dtypes in DataFrame construction (#132)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Towards internal issue 280662868
🦕
---
 bigframes/core/blocks.py             |  41 +---
 bigframes/core/indexes/index.py      |   3 +-
 bigframes/dtypes.py                  |   6 +
 bigframes/session/__init__.py        |  10 +-
 bigframes/session/_io/pandas.py      |  77 +++++++
 tests/system/small/test_dataframe.py |  10 -
 tests/system/small/test_series.py    |  48 ++++-
 tests/unit/session/test_io_pandas.py | 296 +++++++++++++++++++++++++++
 tests/unit/test_dtypes.py            |  57 +++---
 9 files changed, 457 insertions(+), 91 deletions(-)
 create mode 100644 bigframes/session/_io/pandas.py
 create mode 100644 tests/unit/session/test_io_pandas.py

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 046d2b3a44..eab4645477 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -28,11 +28,8 @@
 from typing import Iterable, List, Optional, Sequence, Tuple
 import warnings
 
-import geopandas as gpd  # type: ignore
 import google.cloud.bigquery as bigquery
-import numpy
 import pandas as pd
-import pyarrow as pa  # type: ignore
 
 import bigframes.constants as constants
 import bigframes.core as core
@@ -46,6 +43,7 @@
 import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
+import bigframes.session._io.pandas
 import third_party.bigframes_vendored.pandas.io.common as vendored_pandas_io_common
 
 # Type constraint for wherever column labels are used
@@ -372,34 +370,11 @@ def reorder_levels(self, ids: typing.Sequence[str]):
         level_names = [self.col_id_to_index_name[index_id] for index_id in ids]
         return Block(self.expr, ids, self.column_labels, level_names)
 
-    @classmethod
-    def _to_dataframe(
-        cls, result, schema: typing.Mapping[str, bigframes.dtypes.Dtype]
-    ) -> pd.DataFrame:
+    def _to_dataframe(self, result) -> pd.DataFrame:
         """Convert BigQuery data to pandas DataFrame with specific dtypes."""
-        dtypes = bigframes.dtypes.to_pandas_dtypes_overrides(result.schema)
-        df = result.to_dataframe(
-            dtypes=dtypes,
-            bool_dtype=pd.BooleanDtype(),
-            int_dtype=pd.Int64Dtype(),
-            float_dtype=pd.Float64Dtype(),
-            string_dtype=pd.StringDtype(storage="pyarrow"),
-            date_dtype=pd.ArrowDtype(pa.date32()),
-            datetime_dtype=pd.ArrowDtype(pa.timestamp("us")),
-            time_dtype=pd.ArrowDtype(pa.time64("us")),
-            timestamp_dtype=pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
-        )
-
-        # Convert Geography column from StringDType to GeometryDtype.
-        for column_name, dtype in schema.items():
-            if dtype == gpd.array.GeometryDtype():
-                df[column_name] = gpd.GeoSeries.from_wkt(
-                    # https://github.com/geopandas/geopandas/issues/1879
-                    df[column_name].replace({numpy.nan: None}),
-                    # BigQuery geography type is based on the WGS84 reference ellipsoid.
-                    crs="EPSG:4326",
-                )
-        return df
+        dtypes = dict(zip(self.index_columns, self.index_dtypes))
+        dtypes.update(zip(self.value_columns, self.dtypes))
+        return self._expr._session._rows_to_dataframe(result, dtypes)
 
     def to_pandas(
         self,
@@ -480,8 +455,7 @@ def _compute_and_count(
             if sampling_method == _HEAD:
                 total_rows = int(results_iterator.total_rows * fraction)
                 results_iterator.max_results = total_rows
-                schema = dict(zip(self.value_columns, self.dtypes))
-                df = self._to_dataframe(results_iterator, schema)
+                df = self._to_dataframe(results_iterator)
 
                 if self.index_columns:
                     df.set_index(list(self.index_columns), inplace=True)
@@ -510,8 +484,7 @@ def _compute_and_count(
                 )
         else:
             total_rows = results_iterator.total_rows
-            schema = dict(zip(self.value_columns, self.dtypes))
-            df = self._to_dataframe(results_iterator, schema)
+            df = self._to_dataframe(results_iterator)
 
             if self.index_columns:
                 df.set_index(list(self.index_columns), inplace=True)
diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py
index 677bb8529c..b9ffdff21e 100644
--- a/bigframes/core/indexes/index.py
+++ b/bigframes/core/indexes/index.py
@@ -399,9 +399,10 @@ def to_pandas(self) -> pandas.Index:
         """Executes deferred operations and downloads the results."""
         # Project down to only the index column. So the query can be cached to visualize other data.
         index_columns = list(self._block.index_columns)
+        dtypes = dict(zip(index_columns, self.dtypes))
         expr = self._expr.select_columns(index_columns)
         results, _ = expr.start_query()
-        df = expr._session._rows_to_dataframe(results)
+        df = expr._session._rows_to_dataframe(results, dtypes)
         df = df.set_index(index_columns)
         index = df.index
         index.names = list(self._block._index_labels)
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index da221a95ac..079f0cc27a 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -169,6 +169,10 @@ def ibis_dtype_to_bigframes_dtype(
     if isinstance(ibis_dtype, ibis_dtypes.Struct):
         return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
 
+    # BigQuery only supports integers of size 64 bits.
+    if isinstance(ibis_dtype, ibis_dtypes.Integer):
+        return pd.Int64Dtype()
+
     if ibis_dtype in IBIS_TO_BIGFRAMES:
         return IBIS_TO_BIGFRAMES[ibis_dtype]
     elif isinstance(ibis_dtype, ibis_dtypes.Null):
@@ -372,6 +376,8 @@ def cast_ibis_value(
         ibis_dtypes.float64: (ibis_dtypes.string, ibis_dtypes.int64),
         ibis_dtypes.string: (ibis_dtypes.int64, ibis_dtypes.float64),
         ibis_dtypes.date: (),
+        ibis_dtypes.Decimal(precision=38, scale=9): (ibis_dtypes.float64,),
+        ibis_dtypes.Decimal(precision=76, scale=38): (ibis_dtypes.float64,),
         ibis_dtypes.time: (),
         ibis_dtypes.timestamp: (ibis_dtypes.Timestamp(timezone="UTC"),),
         ibis_dtypes.Timestamp(timezone="UTC"): (ibis_dtypes.timestamp,),
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index db9c5a353c..af1f70d54d 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1515,14 +1515,10 @@ def _get_table_size(self, destination_table):
         return table.num_bytes
 
     def _rows_to_dataframe(
-        self, row_iterator: bigquery.table.RowIterator
+        self, row_iterator: bigquery.table.RowIterator, dtypes: Dict
     ) -> pandas.DataFrame:
-        return row_iterator.to_dataframe(
-            bool_dtype=pandas.BooleanDtype(),
-            int_dtype=pandas.Int64Dtype(),
-            float_dtype=pandas.Float64Dtype(),
-            string_dtype=pandas.StringDtype(storage="pyarrow"),
-        )
+        arrow_table = row_iterator.to_arrow()
+        return bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes)
 
     def _start_generic_job(self, job: formatting_helpers.GenericJob):
         if bigframes.options.display.progress_bar is not None:
diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py
new file mode 100644
index 0000000000..163127b546
--- /dev/null
+++ b/bigframes/session/_io/pandas.py
@@ -0,0 +1,77 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Union
+
+import geopandas  # type: ignore
+import pandas
+import pandas.arrays
+import pyarrow  # type: ignore
+import pyarrow.compute  # type: ignore
+
+import bigframes.constants
+
+
+def arrow_to_pandas(
+    arrow_table: Union[pyarrow.Table, pyarrow.RecordBatch], dtypes: Dict
+):
+    if len(dtypes) != arrow_table.num_columns:
+        raise ValueError(
+            f"Number of types {len(dtypes)} doesn't match number of columns "
+            f"{arrow_table.num_columns}. {bigframes.constants.FEEDBACK_LINK}"
+        )
+
+    serieses = {}
+    for field, column in zip(arrow_table.schema, arrow_table):
+        dtype = dtypes[field.name]
+
+        if dtype == geopandas.array.GeometryDtype():
+            series = geopandas.GeoSeries.from_wkt(
+                column,
+                # BigQuery geography type is based on the WGS84 reference ellipsoid.
+                crs="EPSG:4326",
+            )
+        elif dtype == pandas.Float64Dtype():
+            # Preserve NA/NaN distinction. Note: This is currently needed, even if we use
+            # nullable Float64Dtype in the types_mapper. See:
+            # https://github.com/pandas-dev/pandas/issues/55668
+            # Regarding type: ignore, this class has been public at this
+            # location since pandas 1.2.0. See:
+            # https://pandas.pydata.org/docs/dev/reference/api/pandas.arrays.FloatingArray.html
+            pd_array = pandas.arrays.FloatingArray(  # type: ignore
+                column.to_numpy(),
+                pyarrow.compute.is_null(column).to_numpy(),
+            )
+            series = pandas.Series(pd_array, dtype=dtype)
+        elif dtype == pandas.Int64Dtype():
+            # Avoid out-of-bounds errors in Pandas 1.5.x, which incorrectly
+            # casts to float64 in an intermediate step.
+            pd_array = pandas.arrays.IntegerArray(
+                pyarrow.compute.fill_null(column, 0).to_numpy(),
+                pyarrow.compute.is_null(column).to_numpy(),
+            )
+            series = pandas.Series(pd_array, dtype=dtype)
+        elif isinstance(dtype, pandas.ArrowDtype):
+            # Avoid conversion logic if we are backing the pandas Series by the
+            # arrow array.
+            series = pandas.Series(
+                pandas.arrays.ArrowExtensionArray(column),  # type: ignore
+                dtype=dtype,
+            )
+        else:
+            series = column.to_pandas(types_mapper=lambda _: dtype)
+
+        serieses[field.name] = series
+
+    return pandas.DataFrame(serieses)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 19e50eb06d..84e8def83b 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2046,16 +2046,6 @@ def test__dir__with_rename(scalars_dfs):
 def test_iloc_slice(scalars_df_index, scalars_pandas_df_index, start, stop, step):
     bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
     pd_result = scalars_pandas_df_index.iloc[start:stop:step]
-
-    # Pandas may assign non-object dtype to empty series and series index
-    # dtypes of empty columns are a known area of divergence from pandas
-    for column in pd_result.columns:
-        if (
-            pd_result[column].empty and column != "geography_col"
-        ):  # for empty geography_col, bigframes assigns non-object dtype
-            pd_result[column] = pd_result[column].astype("object")
-            pd_result.index = pd_result.index.astype("object")
-
     pd.testing.assert_frame_equal(
         bf_result,
         pd_result,
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index bd9edbb1ca..c9510290b6 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -575,7 +575,15 @@ def test_series_int_int_operators_series(scalars_dfs, operator):
 )
 def test_mods(scalars_dfs, col_x, col_y, method):
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = getattr(scalars_df[col_x], method)(scalars_df[col_y]).to_pandas()
+    x_bf = scalars_df[col_x]
+    y_bf = scalars_df[col_y]
+    bf_series = getattr(x_bf, method)(y_bf)
+    # BigQuery's mod functions return [BIG]NUMERIC values unless both arguments are integers.
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/mathematical_functions#mod
+    if x_bf.dtype == pd.Int64Dtype() and y_bf.dtype == pd.Int64Dtype():
+        bf_result = bf_series.to_pandas()
+    else:
+        bf_result = bf_series.astype("Float64").to_pandas()
     pd_result = getattr(scalars_pandas_df[col_x], method)(scalars_pandas_df[col_y])
     pd.testing.assert_series_equal(pd_result, bf_result)
 
@@ -620,8 +628,20 @@ def test_divmods_series(scalars_dfs, col_x, col_y, method):
     pd_div_result, pd_mod_result = getattr(scalars_pandas_df[col_x], method)(
         scalars_pandas_df[col_y]
     )
-    pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas())
-    pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas())
+    # BigQuery's mod functions return NUMERIC values for non-INT64 inputs.
+    if bf_div_result.dtype == pd.Int64Dtype():
+        pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas())
+    else:
+        pd.testing.assert_series_equal(
+            pd_div_result, bf_div_result.astype("Float64").to_pandas()
+        )
+
+    if bf_mod_result.dtype == pd.Int64Dtype():
+        pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas())
+    else:
+        pd.testing.assert_series_equal(
+            pd_mod_result, bf_mod_result.astype("Float64").to_pandas()
+        )
 
 
 @pytest.mark.parametrize(
@@ -649,8 +669,20 @@ def test_divmods_scalars(scalars_dfs, col_x, other, method):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_div_result, bf_mod_result = getattr(scalars_df[col_x], method)(other)
     pd_div_result, pd_mod_result = getattr(scalars_pandas_df[col_x], method)(other)
-    pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas())
-    pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas())
+    # BigQuery's mod functions return NUMERIC values for non-INT64 inputs.
+    if bf_div_result.dtype == pd.Int64Dtype():
+        pd.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas())
+    else:
+        pd.testing.assert_series_equal(
+            pd_div_result, bf_div_result.astype("Float64").to_pandas()
+        )
+
+    if bf_mod_result.dtype == pd.Int64Dtype():
+        pd.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas())
+    else:
+        pd.testing.assert_series_equal(
+            pd_mod_result, bf_mod_result.astype("Float64").to_pandas()
+        )
 
 
 @pytest.mark.parametrize(
@@ -1941,12 +1973,6 @@ def test_iloc_nested(scalars_df_index, scalars_pandas_df_index):
 def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, step):
     bf_result = scalars_df_index["string_col"].iloc[start:stop:step].to_pandas()
     pd_result = scalars_pandas_df_index["string_col"].iloc[start:stop:step]
-
-    # Pandas may assign non-object dtype to empty series and series index
-    if pd_result.empty:
-        pd_result = pd_result.astype("object")
-        pd_result.index = pd_result.index.astype("object")
-
     pd.testing.assert_series_equal(
         bf_result,
         pd_result,
diff --git a/tests/unit/session/test_io_pandas.py b/tests/unit/session/test_io_pandas.py
new file mode 100644
index 0000000000..8b95977ec3
--- /dev/null
+++ b/tests/unit/session/test_io_pandas.py
@@ -0,0 +1,296 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+from typing import Dict, Union
+
+import geopandas  # type: ignore
+import numpy
+import pandas
+import pandas.arrays
+import pandas.testing
+import pyarrow  # type: ignore
+import pytest
+
+import bigframes.session._io.pandas
+
+
+@pytest.mark.parametrize(
+    ("arrow_table", "dtypes", "expected"),
+    (
+        pytest.param(
+            pyarrow.Table.from_pydict({}),
+            {},
+            pandas.DataFrame(),
+            id="empty-df",
+        ),
+        pytest.param(
+            pyarrow.Table.from_pydict(
+                {
+                    "bool": pyarrow.array([None, None, None], type=pyarrow.bool_()),
+                    "float": pyarrow.array([None, None, None], type=pyarrow.float64()),
+                    "int": pyarrow.array([None, None, None], type=pyarrow.int64()),
+                    "string": pyarrow.array([None, None, None], type=pyarrow.string()),
+                    "time": pyarrow.array(
+                        [None, None, None], type=pyarrow.time64("us")
+                    ),
+                }
+            ),
+            {
+                "bool": "boolean",
+                "float": pandas.Float64Dtype(),
+                "int": pandas.Int64Dtype(),
+                "string": "string[pyarrow]",
+                "time": pandas.ArrowDtype(pyarrow.time64("us")),
+            },
+            pandas.DataFrame(
+                {
+                    "bool": pandas.Series([None, None, None], dtype="boolean"),
+                    "float": pandas.Series(
+                        pandas.arrays.FloatingArray(  # type: ignore
+                            numpy.array(
+                                [float("nan"), float("nan"), float("nan")],
+                                dtype="float64",
+                            ),
+                            numpy.array([True, True, True], dtype="bool"),
+                        ),
+                        dtype=pandas.Float64Dtype(),
+                    ),
+                    "int": pandas.Series(
+                        [None, None, None],
+                        dtype=pandas.Int64Dtype(),
+                    ),
+                    "string": pandas.Series(
+                        [None, None, None], dtype="string[pyarrow]"
+                    ),
+                    "time": pandas.Series(
+                        [
+                            None,
+                            None,
+                            None,
+                        ],
+                        dtype=pandas.ArrowDtype(pyarrow.time64("us")),
+                    ),
+                }
+            ),
+            id="nulls-df",
+        ),
+        pytest.param(
+            pyarrow.Table.from_pydict(
+                {
+                    "date": pyarrow.array(
+                        [
+                            datetime.date(2023, 8, 29),
+                            None,
+                            datetime.date(2024, 4, 9),
+                            datetime.date(1, 1, 1),
+                        ],
+                        type=pyarrow.date32(),
+                    ),
+                    "datetime": pyarrow.array(
+                        [
+                            datetime.datetime(2023, 8, 29),
+                            None,
+                            datetime.datetime(2024, 4, 9, 23, 59, 59),
+                            datetime.datetime(1, 1, 1, 0, 0, 0, 1),
+                        ],
+                        type=pyarrow.timestamp("us"),
+                    ),
+                    "string": ["123", None, "abc", "xyz"],
+                    "time": pyarrow.array(
+                        [
+                            datetime.time(0, 0, 0, 1),
+                            datetime.time(12, 0, 0),
+                            None,
+                            datetime.time(23, 59, 59, 999999),
+                        ],
+                        type=pyarrow.time64("us"),
+                    ),
+                    "timestamp": pyarrow.array(
+                        [
+                            datetime.datetime(2023, 8, 29),
+                            datetime.datetime(1, 1, 1, 0, 0, 0, 1),
+                            None,
+                            datetime.datetime(2024, 4, 9, 23, 59, 59),
+                        ],
+                        type=pyarrow.timestamp("us", datetime.timezone.utc),
+                    ),
+                }
+            ),
+            {
+                "date": pandas.ArrowDtype(pyarrow.date32()),
+                "datetime": pandas.ArrowDtype(pyarrow.timestamp("us")),
+                "string": "string[pyarrow]",
+                "time": pandas.ArrowDtype(pyarrow.time64("us")),
+                "timestamp": pandas.ArrowDtype(
+                    pyarrow.timestamp("us", datetime.timezone.utc)
+                ),
+            },
+            pandas.DataFrame(
+                {
+                    "date": pandas.Series(
+                        [
+                            datetime.date(2023, 8, 29),
+                            None,
+                            datetime.date(2024, 4, 9),
+                            datetime.date(1, 1, 1),
+                        ],
+                        dtype=pandas.ArrowDtype(pyarrow.date32()),
+                    ),
+                    "datetime": pandas.Series(
+                        [
+                            datetime.datetime(2023, 8, 29),
+                            None,
+                            datetime.datetime(2024, 4, 9, 23, 59, 59),
+                            datetime.datetime(1, 1, 1, 0, 0, 0, 1),
+                        ],
+                        dtype=pandas.ArrowDtype(pyarrow.timestamp("us")),
+                    ),
+                    "string": pandas.Series(
+                        ["123", None, "abc", "xyz"], dtype="string[pyarrow]"
+                    ),
+                    "time": pandas.Series(
+                        [
+                            datetime.time(0, 0, 0, 1),
+                            datetime.time(12, 0, 0),
+                            None,
+                            datetime.time(23, 59, 59, 999999),
+                        ],
+                        dtype=pandas.ArrowDtype(pyarrow.time64("us")),
+                    ),
+                    "timestamp": pandas.Series(
+                        [
+                            datetime.datetime(2023, 8, 29),
+                            datetime.datetime(1, 1, 1, 0, 0, 0, 1),
+                            None,
+                            datetime.datetime(2024, 4, 9, 23, 59, 59),
+                        ],
+                        dtype=pandas.ArrowDtype(
+                            pyarrow.timestamp("us", datetime.timezone.utc)
+                        ),
+                    ),
+                }
+            ),
+            id="arrow-dtypes",
+        ),
+        pytest.param(
+            pyarrow.Table.from_pydict(
+                {
+                    "bool": [True, None, True, False],
+                    "bytes": [b"123", None, b"abc", b"xyz"],
+                    "float": pyarrow.array(
+                        [1.0, None, float("nan"), -1.0],
+                        type=pyarrow.float64(),
+                    ),
+                    "int": pyarrow.array(
+                        [1, None, -1, 2**63 - 1],
+                        type=pyarrow.int64(),
+                    ),
+                    "string": ["123", None, "abc", "xyz"],
+                }
+            ),
+            {
+                "bool": "boolean",
+                "bytes": "object",
+                "float": pandas.Float64Dtype(),
+                "int": pandas.Int64Dtype(),
+                "string": "string[pyarrow]",
+            },
+            pandas.DataFrame(
+                {
+                    "bool": pandas.Series([True, None, True, False], dtype="boolean"),
+                    "bytes": [b"123", None, b"abc", b"xyz"],
+                    "float": pandas.Series(
+                        pandas.arrays.FloatingArray(  # type: ignore
+                            numpy.array(
+                                [1.0, float("nan"), float("nan"), -1.0], dtype="float64"
+                            ),
+                            numpy.array([False, True, False, False], dtype="bool"),
+                        ),
+                        dtype=pandas.Float64Dtype(),
+                    ),
+                    "int": pandas.Series(
+                        [1, None, -1, 2**63 - 1],
+                        dtype=pandas.Int64Dtype(),
+                    ),
+                    "string": pandas.Series(
+                        ["123", None, "abc", "xyz"], dtype="string[pyarrow]"
+                    ),
+                }
+            ),
+            id="scalar-dtypes",
+        ),
+        pytest.param(
+            pyarrow.Table.from_pydict(
+                {
+                    "geocol": [
+                        "POINT(32 210)",
+                        None,
+                        "LINESTRING(1 1, 2 1, 3.1 2.88, 3 -3)",
+                    ]
+                }
+            ),
+            {"geocol": geopandas.array.GeometryDtype()},
+            pandas.DataFrame(
+                {
+                    "geocol": geopandas.GeoSeries.from_wkt(
+                        ["POINT(32 210)", None, "LINESTRING(1 1, 2 1, 3.1 2.88, 3 -3)"],
+                        crs="EPSG:4326",
+                    ),
+                }
+            ),
+            id="geography-dtype",
+        ),
+    ),
+)
+def test_arrow_to_pandas(
+    arrow_table: Union[pyarrow.Table, pyarrow.RecordBatch],
+    dtypes: Dict,
+    expected: pandas.DataFrame,
+):
+    actual = bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes)
+    pandas.testing.assert_series_equal(actual.dtypes, expected.dtypes)
+
+    # assert_frame_equal is converting to numpy internally, which causes some
+    # loss of precision with the extreme values in this test.
+    for column in actual.columns:
+        assert tuple(
+            (index, value) if (value is pandas.NA or value == value) else (index, "nan")
+            for index, value in actual[column].items()
+        ) == tuple(
+            (index, value) if (value is pandas.NA or value == value) else (index, "nan")
+            for index, value in expected[column].items()
+        )
+
+
+@pytest.mark.parametrize(
+    ("arrow_table", "dtypes"),
+    (
+        pytest.param(
+            pyarrow.Table.from_pydict({"col1": [1], "col2": [2]}),
+            {"col1": "Int64"},
+            id="too-few-dtypes",
+        ),
+        pytest.param(
+            pyarrow.RecordBatch.from_pydict({"col1": [1]}),
+            {"col1": "Int64", "col2": "string[pyarrow]"},
+            id="too-many-dtypes",
+        ),
+    ),
+)
+def test_arrow_to_pandas_wrong_size_dtypes(
+    arrow_table: Union[pyarrow.Table, pyarrow.RecordBatch], dtypes: Dict
+):
+    with pytest.raises(ValueError, match=f"Number of types {len(dtypes)}"):
+        bigframes.session._io.pandas.arrow_to_pandas(arrow_table, dtypes)
diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py
index 3baff2e1f5..6ceaaf911b 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/test_dtypes.py
@@ -29,41 +29,42 @@
         # TODO(bmil): Add ARRAY, INTERVAL, STRUCT to cover all the standard
         # BigQuery data types as they appear in Ibis:
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
-        (ibis_dtypes.Decimal(precision=76, scale=38, nullable=True), np.dtype("O")),
-        (ibis_dtypes.boolean, pd.BooleanDtype()),
-        (ibis_dtypes.binary, np.dtype("O")),
-        (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
-        (ibis_dtypes.Timestamp(), pd.ArrowDtype(pa.timestamp("us"))),
-        (ibis_dtypes.float64, pd.Float64Dtype()),
-        (
+        pytest.param(
+            ibis_dtypes.Decimal(precision=76, scale=38, nullable=True),
+            np.dtype("O"),
+            id="bignumeric",
+        ),
+        pytest.param(ibis_dtypes.boolean, pd.BooleanDtype(), id="bool"),
+        pytest.param(ibis_dtypes.binary, np.dtype("O"), id="bytes"),
+        pytest.param(ibis_dtypes.date, pd.ArrowDtype(pa.date32()), id="date"),
+        pytest.param(
+            ibis_dtypes.Timestamp(), pd.ArrowDtype(pa.timestamp("us")), id="datetime"
+        ),
+        pytest.param(ibis_dtypes.float64, pd.Float64Dtype(), id="float"),
+        pytest.param(
             ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True),
             gpd.array.GeometryDtype(),
+            id="geography",
         ),
-        (ibis_dtypes.int64, pd.Int64Dtype()),
-        (ibis_dtypes.json, np.dtype("O")),
-        (ibis_dtypes.Decimal(precision=38, scale=9, nullable=True), np.dtype("O")),
-        (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
-        (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
-        (
+        pytest.param(ibis_dtypes.int8, pd.Int64Dtype(), id="int8-as-int64"),
+        pytest.param(ibis_dtypes.int64, pd.Int64Dtype(), id="int64"),
+        # TODO(tswast): custom dtype (or at least string dtype) for JSON objects
+        pytest.param(ibis_dtypes.json, np.dtype("O"), id="json"),
+        pytest.param(
+            ibis_dtypes.Decimal(precision=38, scale=9, nullable=True),
+            np.dtype("O"),
+            id="numeric",
+        ),
+        pytest.param(
+            ibis_dtypes.string, pd.StringDtype(storage="pyarrow"), id="string"
+        ),
+        pytest.param(ibis_dtypes.time, pd.ArrowDtype(pa.time64("us")), id="time"),
+        pytest.param(
             ibis_dtypes.Timestamp(timezone="UTC"),
             pd.ArrowDtype(pa.timestamp("us", tz="UTC")),  # type: ignore
+            id="timestamp",
         ),
     ],
-    ids=[
-        "bignumeric",
-        "bool",
-        "bytes",
-        "date",
-        "datetime",
-        "float",
-        "geography",
-        "int64",
-        "json",
-        "numeric",
-        "string",
-        "time",
-        "timestamp",
-    ],
 )
 def test_ibis_dtype_converts(ibis_dtype, bigframes_dtype):
     """Test all the Ibis data types needed to read BigQuery tables"""

From ef97071ae1b0115edcbbb915ac6840ae5bd00db3 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Thu, 26 Oct 2023 17:08:51 +0000
Subject: [PATCH 7/7] fix: address comment

---
 bigframes/ml/ensemble.py                               | 10 ++++++----
 bigframes/ml/forecasting.py                            |  5 +++--
 third_party/bigframes_vendored/sklearn/base.py         | 10 ++++++----
 .../bigframes_vendored/sklearn/cluster/_kmeans.py      |  5 +++--
 .../bigframes_vendored/sklearn/decomposition/_pca.py   |  5 +++--
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 764f00ed12..19ca8608ff 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -509,8 +509,9 @@ def score(
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#regression_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
@@ -683,8 +684,9 @@ def score(
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#classification_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index b88518a843..8e309d5e73 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -114,8 +114,9 @@ def score(
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#time_series_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/base.py b/third_party/bigframes_vendored/sklearn/base.py
index 4d039be60d..768328e552 100644
--- a/third_party/bigframes_vendored/sklearn/base.py
+++ b/third_party/bigframes_vendored/sklearn/base.py
@@ -87,8 +87,9 @@ def score(self, X, y):
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#classification_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
@@ -114,8 +115,9 @@ def score(self, X, y):
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#regression_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
index 7b22bb4560..5369d3662d 100644
--- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
+++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py
@@ -86,8 +86,9 @@ def score(
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#k-means_models
+            for the outputs relevant to this model type.
 
         Args:
             X (bigframes.dataframe.DataFrame or bigframes.series.Series):
diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
index 0326a10c2d..011ecc06dd 100644
--- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
+++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py
@@ -59,8 +59,9 @@ def score(self, X=None, y=None):
 
         .. note::
 
-            We're using BigQuery ML.EVALUATE function (https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate)
-            for evaluateing model metrics.
+            Output matches that of the BigQuery ML.EVALUTE function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-evaluate#pca_models
+            for the outputs relevant to this model type.
 
         Args:
             X (default None):