diff --git a/.kokoro/continuous/doctest.cfg b/.kokoro/continuous/doctest.cfg
index 6016700408..2aad95beed 100644
--- a/.kokoro/continuous/doctest.cfg
+++ b/.kokoro/continuous/doctest.cfg
@@ -3,7 +3,7 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "doctest cleanup"
+    value: "cleanup doctest"
 }
 
 env_vars: {
diff --git a/.kokoro/presubmit/doctest.cfg b/.kokoro/presubmit/doctest.cfg
index 6016700408..2aad95beed 100644
--- a/.kokoro/presubmit/doctest.cfg
+++ b/.kokoro/presubmit/doctest.cfg
@@ -3,7 +3,7 @@
 # Only run this nox session.
 env_vars: {
     key: "NOX_SESSION"
-    value: "doctest cleanup"
+    value: "cleanup doctest"
 }
 
 env_vars: {
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84dd3f36c1..0393ad944c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,31 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.6.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.5.0...v2.6.0) (2025-06-09)
+
+
+### Features
+
+* Add blob.transcribe function ([#1773](https://github.com/googleapis/python-bigquery-dataframes/issues/1773)) ([86159a7](https://github.com/googleapis/python-bigquery-dataframes/commit/86159a7d24102574c26764a056478757844e2eca))
+* Implement ai.classify() ([#1781](https://github.com/googleapis/python-bigquery-dataframes/issues/1781)) ([8af26d0](https://github.com/googleapis/python-bigquery-dataframes/commit/8af26d07cf3e8b22e0c69dd0172352fadc1857d8))
+* Implement item() for Series and Index ([#1792](https://github.com/googleapis/python-bigquery-dataframes/issues/1792)) ([d2154c8](https://github.com/googleapis/python-bigquery-dataframes/commit/d2154c82fa0fed6e89c47db747d3c9cd57f9c618))
+* Implement ST_ISCLOSED geography function ([#1789](https://github.com/googleapis/python-bigquery-dataframes/issues/1789)) ([36bc179](https://github.com/googleapis/python-bigquery-dataframes/commit/36bc179ee7ef9b0b6799f98f8fac3f64d91412af))
+* Implement ST_LENGTH geography function ([#1791](https://github.com/googleapis/python-bigquery-dataframes/issues/1791)) ([c5b7fda](https://github.com/googleapis/python-bigquery-dataframes/commit/c5b7fdae74a22e581f7705bc0cf5390e928f4425))
+* Support isin with bigframes.pandas.Index arg ([#1779](https://github.com/googleapis/python-bigquery-dataframes/issues/1779)) ([e480d29](https://github.com/googleapis/python-bigquery-dataframes/commit/e480d29f03636fa9824404ef90c510701e510195))
+
+
+### Bug Fixes
+
+* Address `read_csv` with both `index_col` and `use_cols` behavior inconsistency with pandas ([#1785](https://github.com/googleapis/python-bigquery-dataframes/issues/1785)) ([ba7c313](https://github.com/googleapis/python-bigquery-dataframes/commit/ba7c313c8d308e3ff3f736b60978cb7a51715209))
+* Allow KMeans model init parameter as k-means++ alias ([#1790](https://github.com/googleapis/python-bigquery-dataframes/issues/1790)) ([0b59cf1](https://github.com/googleapis/python-bigquery-dataframes/commit/0b59cf1008613770fa1433c6da395e755c86fe22))
+* Replace function now can handle bpd.NA value. ([#1786](https://github.com/googleapis/python-bigquery-dataframes/issues/1786)) ([7269512](https://github.com/googleapis/python-bigquery-dataframes/commit/7269512a28eb42029447d5380c764353278a74e1))
+
+
+### Documentation
+
+* Adjust strip method examples to match latest pandas ([#1797](https://github.com/googleapis/python-bigquery-dataframes/issues/1797)) ([817b0c0](https://github.com/googleapis/python-bigquery-dataframes/commit/817b0c0c5dc481598fbfdbe40fd925fb38f3a066))
+* Fix docstrings to improve html rendering of code examples ([#1788](https://github.com/googleapis/python-bigquery-dataframes/issues/1788)) ([38d9b73](https://github.com/googleapis/python-bigquery-dataframes/commit/38d9b7376697f8e19124e5d1f5fccda82d920b92))
+
 ## [2.5.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.4.0...v2.5.0) (2025-05-30)
 
 
diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py
index 301207bb31..22bcfb1407 100644
--- a/bigframes/bigquery/__init__.py
+++ b/bigframes/bigquery/__init__.py
@@ -32,6 +32,8 @@
     st_difference,
     st_distance,
     st_intersection,
+    st_isclosed,
+    st_length,
 )
 from bigframes.bigquery._operations.json import (
     json_extract,
@@ -58,6 +60,8 @@
     "st_difference",
     "st_distance",
     "st_intersection",
+    "st_isclosed",
+    "st_length",
     # json ops
     "json_extract",
     "json_extract_array",
diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py
index fc9bd1a653..bdc85eed9f 100644
--- a/bigframes/bigquery/_operations/geo.py
+++ b/bigframes/bigquery/_operations/geo.py
@@ -380,3 +380,126 @@ def st_intersection(
             each aligned geometry with other.
     """
     return series._apply_binary_op(other, ops.geo_st_intersection_op)
+
+
+def st_isclosed(
+    series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
+) -> bigframes.series.Series:
+    """
+    Returns TRUE for a non-empty Geography, where each element in the
+    Geography has an empty boundary.
+
+    .. note::
+        BigQuery's Geography functions, like `st_isclosed`, interpret the geometry
+        data type as a point set on the Earth's surface. A point set is a set
+        of points, lines, and polygons on the WGS84 reference spheroid, with
+        geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
+
+    **Examples:**
+
+        >>> import bigframes.geopandas
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+
+        >>> from shapely.geometry import Point, LineString, Polygon
+        >>> bpd.options.display.progress_bar = None
+
+        >>> series = bigframes.geopandas.GeoSeries(
+        ...     [
+        ...         Point(0, 0),  # Point
+        ...         LineString([(0, 0), (1, 1)]),  # Open LineString
+        ...         LineString([(0, 0), (1, 1), (0, 1), (0, 0)]),  # Closed LineString
+        ...         Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]),
+        ...         None,
+        ...     ]
+        ... )
+        >>> series
+        0                                       POINT (0 0)
+        1                            LINESTRING (0 0, 1 1)
+        2             LINESTRING (0 0, 1 1, 0 1, 0 0)
+        3             POLYGON ((0 0, 1 1, 0 1, 0 0))
+        4                                           None
+        dtype: geometry
+
+        >>> bbq.st_isclosed(series)
+        0     True
+        1    False
+        2     True
+        3     False
+        4     <NA>
+        dtype: boolean
+
+    Args:
+        series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
+            A series containing geography objects.
+
+    Returns:
+        bigframes.pandas.Series:
+            Series of booleans indicating whether each geometry is closed.
+    """
+    series = series._apply_unary_op(ops.geo_st_isclosed_op)
+    series.name = None
+    return series
+
+
+def st_length(
+    series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
+    *,
+    use_spheroid: bool = False,
+) -> bigframes.series.Series:
+    """Returns the total length in meters of the lines in the input GEOGRAPHY.
+
+    If a series element is a point or a polygon, returns zero for that row.
+    If a series element is a collection, returns the length of the lines
+    in the collection; if the collection doesn't contain lines, returns
+    zero.
+
+    The optional use_spheroid parameter determines how this function
+    measures distance. If use_spheroid is FALSE, the function measures
+    distance on the surface of a perfect sphere.
+
+    The use_spheroid parameter currently only supports the value FALSE.  The
+    default value of use_spheroid is FALSE. See:
+    https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_length
+
+    **Examples:**
+
+        >>> import bigframes.geopandas
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+
+        >>> from shapely.geometry import Polygon, LineString, Point, GeometryCollection
+        >>> bpd.options.display.progress_bar = None
+
+        >>> series = bigframes.geopandas.GeoSeries(
+        ...         [
+        ...             LineString([(0, 0), (1, 0)]),  # Length will be approx 1 degree in meters
+        ...             Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]), # Length is 0
+        ...             Point(0, 1),  # Length is 0
+        ...             GeometryCollection([LineString([(0,0),(0,1)]), Point(1,1)]) # Length of LineString only
+        ...         ]
+        ... )
+
+        >>> result = bbq.st_length(series)
+        >>> result
+        0    111195.101177
+        1              0.0
+        2              0.0
+        3    111195.101177
+        dtype: Float64
+
+    Args:
+        series (bigframes.series.Series | bigframes.geopandas.GeoSeries):
+            A series containing geography objects.
+        use_spheroid (bool, optional):
+            Determines how this function measures distance.
+            If FALSE (default), measures distance on a perfect sphere.
+            Currently, only FALSE is supported.
+
+    Returns:
+        bigframes.series.Series:
+            Series of floats representing the lengths in meters.
+    """
+    series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
+    series.name = None
+    return series
diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
index f8fdb21946..51c030a23b 100644
--- a/bigframes/blob/_functions.py
+++ b/bigframes/blob/_functions.py
@@ -95,6 +95,10 @@ def _create_udf(self):
             sql,
             job_config=bigquery.QueryJobConfig(),
             metrics=self._session._metrics,
+            location=None,
+            project=None,
+            timeout=None,
+            query_with_job=True,
         )
 
         return udf_name
diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py
index 20773fd1b4..a6c700a485 100644
--- a/bigframes/core/array_value.py
+++ b/bigframes/core/array_value.py
@@ -34,7 +34,6 @@
 import bigframes.core.ordering as orderings
 import bigframes.core.schema as schemata
 import bigframes.core.tree_properties
-import bigframes.core.utils
 from bigframes.core.window_spec import WindowSpec
 import bigframes.dtypes
 import bigframes.exceptions as bfe
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index fb5399b7cb..451783602d 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -22,10 +22,9 @@
 import bigframes_vendored.ibis.expr.api as ibis_api
 import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
 import bigframes_vendored.ibis.expr.types as ibis_types
-import pyarrow as pa
 
 from bigframes import dtypes, operations
-from bigframes.core import expression
+from bigframes.core import expression, pyarrow_utils
 import bigframes.core.compile.compiled as compiled
 import bigframes.core.compile.concat as concat_impl
 import bigframes.core.compile.configs as configs
@@ -172,9 +171,7 @@ def compile_readlocal(node: nodes.ReadLocalNode, *args):
     pa_table = pa_table.rename_columns([item.id.sql for item in node.scan_list.items])
 
     if offsets:
-        pa_table = pa_table.append_column(
-            offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
-        )
+        pa_table = pyarrow_utils.append_offsets(pa_table, offsets)
     return compiled.UnorderedIR.from_polars(pa_table, bq_schema)
 
 
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
index 14d8e8501c..a0e85d8c69 100644
--- a/bigframes/core/compile/polars/compiler.py
+++ b/bigframes/core/compile/polars/compiler.py
@@ -16,14 +16,17 @@
 import dataclasses
 import functools
 import itertools
-from typing import cast, Optional, Sequence, Tuple, TYPE_CHECKING
+import operator
+from typing import cast, Literal, Optional, Sequence, Tuple, TYPE_CHECKING
+
+import pandas as pd
 
 import bigframes.core
-from bigframes.core import window_spec
+from bigframes.core import identifiers, nodes, ordering, window_spec
 import bigframes.core.expression as ex
 import bigframes.core.guid as guid
-import bigframes.core.nodes as nodes
 import bigframes.core.rewrite
+import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
 
@@ -37,6 +40,45 @@
         polars_installed = False
 
 if polars_installed:
+    _DTYPE_MAPPING = {
+        # Direct mappings
+        bigframes.dtypes.INT_DTYPE: pl.Int64(),
+        bigframes.dtypes.FLOAT_DTYPE: pl.Float64(),
+        bigframes.dtypes.BOOL_DTYPE: pl.Boolean(),
+        bigframes.dtypes.STRING_DTYPE: pl.String(),
+        bigframes.dtypes.NUMERIC_DTYPE: pl.Decimal(38, 9),
+        bigframes.dtypes.BIGNUMERIC_DTYPE: pl.Decimal(76, 38),
+        bigframes.dtypes.BYTES_DTYPE: pl.Binary(),
+        bigframes.dtypes.DATE_DTYPE: pl.Date(),
+        bigframes.dtypes.DATETIME_DTYPE: pl.Datetime(time_zone=None),
+        bigframes.dtypes.TIMESTAMP_DTYPE: pl.Datetime(time_zone="UTC"),
+        bigframes.dtypes.TIME_DTYPE: pl.Time(),
+        bigframes.dtypes.TIMEDELTA_DTYPE: pl.Duration(),
+        # Indirect mappings
+        bigframes.dtypes.GEO_DTYPE: pl.String(),
+        bigframes.dtypes.JSON_DTYPE: pl.String(),
+    }
+
+    def _bigframes_dtype_to_polars_dtype(
+        dtype: bigframes.dtypes.ExpressionType,
+    ) -> pl.DataType:
+        if dtype is None:
+            return pl.Null()
+        if bigframes.dtypes.is_struct_like(dtype):
+            return pl.Struct(
+                [
+                    pl.Field(name, _bigframes_dtype_to_polars_dtype(type))
+                    for name, type in bigframes.dtypes.get_struct_fields(dtype).items()
+                ]
+            )
+        if bigframes.dtypes.is_array_like(dtype):
+            return pl.Array(
+                inner=_bigframes_dtype_to_polars_dtype(
+                    bigframes.dtypes.get_array_inner_type(dtype)
+                )
+            )
+        else:
+            return _DTYPE_MAPPING[dtype]
 
     @dataclasses.dataclass(frozen=True)
     class PolarsExpressionCompiler:
@@ -47,33 +89,45 @@ class PolarsExpressionCompiler:
         """
 
         @functools.singledispatchmethod
-        def compile_expression(self, expression: ex.Expression):
+        def compile_expression(self, expression: ex.Expression) -> pl.Expr:
             raise NotImplementedError(f"Cannot compile expression: {expression}")
 
         @compile_expression.register
         def _(
             self,
             expression: ex.ScalarConstantExpression,
-        ):
-            return pl.lit(expression.value)
+        ) -> pl.Expr:
+            value = expression.value
+            if not isinstance(value, float) and pd.isna(value):  # type: ignore
+                value = None
+            if expression.dtype is None:
+                return pl.lit(None)
+            return pl.lit(value, _bigframes_dtype_to_polars_dtype(expression.dtype))
 
         @compile_expression.register
         def _(
             self,
             expression: ex.DerefOp,
-        ):
+        ) -> pl.Expr:
             return pl.col(expression.id.sql)
 
+        @compile_expression.register
+        def _(
+            self,
+            expression: ex.SchemaFieldRefExpression,
+        ) -> pl.Expr:
+            return pl.col(expression.field.id.sql)
+
         @compile_expression.register
         def _(
             self,
             expression: ex.OpExpression,
-        ):
+        ) -> pl.Expr:
             # TODO: Complete the implementation, convert to hash dispatch
             op = expression.op
             args = tuple(map(self.compile_expression, expression.inputs))
             if isinstance(op, ops.invert_op.__class__):
-                return args[0].neg()
+                return ~args[0]
             if isinstance(op, ops.and_op.__class__):
                 return args[0] & args[1]
             if isinstance(op, ops.or_op.__class__):
@@ -82,6 +136,21 @@ def _(
                 return args[0] + args[1]
             if isinstance(op, ops.sub_op.__class__):
                 return args[0] - args[1]
+            if isinstance(op, ops.mul_op.__class__):
+                return args[0] * args[1]
+            if isinstance(op, ops.div_op.__class__):
+                return args[0] / args[1]
+            if isinstance(op, ops.floordiv_op.__class__):
+                # TODO: Handle int // 0
+                return args[0] // args[1]
+            if isinstance(op, (ops.pow_op.__class__, ops.unsafe_pow_op.__class__)):
+                return args[0] ** args[1]
+            if isinstance(op, ops.abs_op.__class__):
+                return args[0].abs()
+            if isinstance(op, ops.neg_op.__class__):
+                return args[0].neg()
+            if isinstance(op, ops.pos_op.__class__):
+                return args[0]
             if isinstance(op, ops.ge_op.__class__):
                 return args[0] >= args[1]
             if isinstance(op, ops.gt_op.__class__):
@@ -91,23 +160,48 @@ def _(
             if isinstance(op, ops.lt_op.__class__):
                 return args[0] < args[1]
             if isinstance(op, ops.eq_op.__class__):
-                return args[0] == args[1]
+                return args[0].eq(args[1])
+            if isinstance(op, ops.eq_null_match_op.__class__):
+                return args[0].eq_missing(args[1])
             if isinstance(op, ops.ne_op.__class__):
-                return args[0] != args[1]
+                return args[0].ne(args[1])
+            if isinstance(op, ops.IsInOp):
+                # TODO: Filter out types that can't be coerced to right type
+                if op.match_nulls or not any(map(pd.isna, op.values)):
+                    # newer polars version have nulls_equal arg
+                    return args[0].is_in(op.values)
+                else:
+                    return args[0].is_in(op.values) or args[0].is_null()
             if isinstance(op, ops.mod_op.__class__):
                 return args[0] % args[1]
             if isinstance(op, ops.coalesce_op.__class__):
                 return pl.coalesce(*args)
+            if isinstance(op, ops.fillna_op.__class__):
+                return pl.coalesce(*args)
+            if isinstance(op, ops.isnull_op.__class__):
+                return args[0].is_null()
+            if isinstance(op, ops.notnull_op.__class__):
+                return args[0].is_not_null()
             if isinstance(op, ops.CaseWhenOp):
                 expr = pl.when(args[0]).then(args[1])
                 for pred, result in zip(args[2::2], args[3::2]):
-                    return expr.when(pred).then(result)
+                    expr = expr.when(pred).then(result)  # type: ignore
                 return expr
             if isinstance(op, ops.where_op.__class__):
                 original, condition, otherwise = args
                 return pl.when(condition).then(original).otherwise(otherwise)
+            if isinstance(op, ops.AsTypeOp):
+                return self.astype(args[0], op.to_type, safe=op.safe)
+
             raise NotImplementedError(f"Polars compiler hasn't implemented {op}")
 
+        def astype(
+            self, col: pl.Expr, dtype: bigframes.dtypes.Dtype, safe: bool
+        ) -> pl.Expr:
+            # TODO: Polars casting works differently, need to lower instead to specific conversion ops.
+            # eg. We want "True" instead of "true" for bool to string.
+            return col.cast(_DTYPE_MAPPING[dtype], strict=not safe)
+
     @dataclasses.dataclass(frozen=True)
     class PolarsAggregateCompiler:
         scalar_compiler = PolarsExpressionCompiler()
@@ -149,12 +243,26 @@ def compile_agg_expr(self, expr: ex.Aggregation):
 
             return self.compile_agg_op(expr.op, inputs)
 
-        def compile_agg_op(self, op: agg_ops.WindowOp, inputs: Sequence[str] = []):
+        def compile_agg_op(
+            self, op: agg_ops.WindowOp, inputs: Sequence[str] = []
+        ) -> pl.Expr:
             if isinstance(op, agg_ops.ProductOp):
-                # TODO: Need schema to cast back to original type if posisble (eg float back to int)
-                return pl.col(*inputs).log().sum().exp()
+                # TODO: Fix datatype inconsistency with float/int
+                return pl.col(*inputs).product()
             if isinstance(op, agg_ops.SumOp):
                 return pl.sum(*inputs)
+            if isinstance(op, (agg_ops.SizeOp, agg_ops.SizeUnaryOp)):
+                return pl.len()
+            if isinstance(op, agg_ops.MeanOp):
+                return pl.mean(*inputs)
+            if isinstance(op, agg_ops.MedianOp):
+                return pl.median(*inputs)
+            if isinstance(op, agg_ops.AllOp):
+                return pl.all(*inputs)
+            if isinstance(op, agg_ops.AnyOp):
+                return pl.any(*inputs)  # type: ignore
+            if isinstance(op, agg_ops.NuniqueOp):
+                return pl.col(*inputs).drop_nulls().n_unique()
             if isinstance(op, agg_ops.MinOp):
                 return pl.min(*inputs)
             if isinstance(op, agg_ops.MaxOp):
@@ -162,7 +270,35 @@ def compile_agg_op(self, op: agg_ops.WindowOp, inputs: Sequence[str] = []):
             if isinstance(op, agg_ops.CountOp):
                 return pl.count(*inputs)
             if isinstance(op, agg_ops.CorrOp):
-                return pl.corr(*inputs)
+                return pl.corr(
+                    pl.col(inputs[0]).fill_nan(None), pl.col(inputs[1]).fill_nan(None)
+                )
+            if isinstance(op, agg_ops.CovOp):
+                return pl.cov(
+                    pl.col(inputs[0]).fill_nan(None), pl.col(inputs[1]).fill_nan(None)
+                )
+            if isinstance(op, agg_ops.StdOp):
+                return pl.std(inputs[0])
+            if isinstance(op, agg_ops.VarOp):
+                return pl.var(inputs[0])
+            if isinstance(op, agg_ops.PopVarOp):
+                return pl.var(inputs[0], ddof=0)
+            if isinstance(op, agg_ops.FirstNonNullOp):
+                return pl.col(*inputs).drop_nulls().first()
+            if isinstance(op, agg_ops.LastNonNullOp):
+                return pl.col(*inputs).drop_nulls().last()
+            if isinstance(op, agg_ops.FirstOp):
+                return pl.col(*inputs).first()
+            if isinstance(op, agg_ops.LastOp):
+                return pl.col(*inputs).last()
+            if isinstance(op, agg_ops.ShiftOp):
+                return pl.col(*inputs).shift(op.periods)
+            if isinstance(op, agg_ops.DiffOp):
+                return pl.col(*inputs) - pl.col(*inputs).shift(op.periods)
+            if isinstance(op, agg_ops.AnyValueOp):
+                return pl.max(
+                    *inputs
+                )  # probably something faster? maybe just get first item?
             raise NotImplementedError(
                 f"Aggregate op {op} not yet supported in polars engine."
             )
@@ -197,11 +333,14 @@ def compile(self, array_value: bigframes.core.ArrayValue) -> pl.LazyFrame:
 
         # TODO: Create standard way to configure BFET -> BFET rewrites
         # Polars has incomplete slice support in lazy mode
-        node = nodes.bottom_up(array_value.node, bigframes.core.rewrite.rewrite_slice)
+        node = array_value.node
+        node = bigframes.core.rewrite.column_pruning(node)
+        node = nodes.bottom_up(node, bigframes.core.rewrite.rewrite_slice)
+        node = bigframes.core.rewrite.pull_out_window_order(node)
         return self.compile_node(node)
 
     @functools.singledispatchmethod
-    def compile_node(self, node: nodes.BigFrameNode):
+    def compile_node(self, node: nodes.BigFrameNode) -> pl.LazyFrame:
         """Defines transformation but isn't cached, always use compile_node instead"""
         raise ValueError(f"Can't compile unrecognized node: {node}")
 
@@ -213,7 +352,12 @@ def compile_readlocal(self, node: nodes.ReadLocalNode):
         lazy_frame = cast(
             pl.DataFrame, pl.from_arrow(node.local_data_source.data)
         ).lazy()
-        return lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
+        lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read)
+        if node.offsets_col:
+            lazy_frame = lazy_frame.with_columns(
+                [pl.int_range(pl.len(), dtype=pl.Int64).alias(node.offsets_col.sql)]
+            )
+        return lazy_frame
 
     @compile_node.register
     def compile_filter(self, node: nodes.FilterNode):
@@ -227,17 +371,18 @@ def compile_orderby(self, node: nodes.OrderByNode):
         if len(node.by) == 0:
             # pragma: no cover
             return frame
-
-        frame = frame.sort(
-            [
-                self.expr_compiler.compile_expression(by.scalar_expression)
-                for by in node.by
-            ],
-            descending=[not by.direction.is_ascending for by in node.by],
-            nulls_last=[by.na_last for by in node.by],
+        return self._sort(frame, node.by)
+
+    def _sort(
+        self, frame: pl.LazyFrame, by: Sequence[ordering.OrderingExpression]
+    ) -> pl.LazyFrame:
+        sorted = frame.sort(
+            [self.expr_compiler.compile_expression(by.scalar_expression) for by in by],
+            descending=[not by.direction.is_ascending for by in by],
+            nulls_last=[by.na_last for by in by],
             maintain_order=True,
         )
-        return frame
+        return sorted
 
     @compile_node.register
     def compile_reversed(self, node: nodes.ReversedNode):
@@ -251,10 +396,15 @@ def compile_selection(self, node: nodes.SelectionNode):
 
     @compile_node.register
     def compile_projection(self, node: nodes.ProjectionNode):
-        new_cols = [
-            self.expr_compiler.compile_expression(ex).alias(name.sql)
-            for ex, name in node.assignments
-        ]
+        new_cols = []
+        for proj_expr, name in node.assignments:
+            bound_expr = ex.bind_schema_fields(proj_expr, node.child.field_by_id)
+            new_col = self.expr_compiler.compile_expression(bound_expr).alias(name.sql)
+            if bound_expr.output_type is None:
+                new_col = new_col.cast(
+                    _bigframes_dtype_to_polars_dtype(bigframes.dtypes.DEFAULT_DTYPE)
+                )
+            new_cols.append(new_col)
         return self.compile_node(node.child).with_columns(new_cols)
 
     @compile_node.register
@@ -265,37 +415,91 @@ def compile_offsets(self, node: nodes.PromoteOffsetsNode):
 
     @compile_node.register
     def compile_join(self, node: nodes.JoinNode):
-        # Always totally order this, as adding offsets is relatively cheap for in-memory columnar data
-        left = self.compile_node(node.left_child).with_columns(
+        left = self.compile_node(node.left_child)
+        right = self.compile_node(node.right_child)
+        left_on = [l_name.id.sql for l_name, _ in node.conditions]
+        right_on = [r_name.id.sql for _, r_name in node.conditions]
+        if node.type == "right":
+            return self._ordered_join(
+                right, left, "left", right_on, left_on, node.joins_nulls
+            ).select([id.sql for id in node.ids])
+        return self._ordered_join(
+            left, right, node.type, left_on, right_on, node.joins_nulls
+        )
+
+    def _ordered_join(
+        self,
+        left_frame: pl.LazyFrame,
+        right_frame: pl.LazyFrame,
+        how: Literal["inner", "outer", "left", "cross"],
+        left_on: Sequence[str],
+        right_on: Sequence[str],
+        join_nulls: bool,
+    ):
+        if how == "right":
+            # seems to cause seg faults as of v1.30 for no apparent reason
+            raise ValueError("right join not supported")
+        left = left_frame.with_columns(
             [
                 pl.int_range(pl.len()).alias("_bf_join_l"),
             ]
         )
-        right = self.compile_node(node.right_child).with_columns(
+        right = right_frame.with_columns(
             [
                 pl.int_range(pl.len()).alias("_bf_join_r"),
             ]
         )
-        if node.type != "cross":
-            left_on = [l_name.id.sql for l_name, _ in node.conditions]
-            right_on = [r_name.id.sql for _, r_name in node.conditions]
+        if how != "cross":
             joined = left.join(
-                right, how=node.type, left_on=left_on, right_on=right_on, coalesce=False
+                right,
+                how=how,
+                left_on=left_on,
+                right_on=right_on,
+                # Note: join_nulls renamed to nulls_equal for polars 1.24
+                join_nulls=join_nulls,  # type: ignore
+                coalesce=False,
             )
         else:
-            joined = left.join(right, how=node.type)
-        return joined.sort(["_bf_join_l", "_bf_join_r"]).drop(
+            joined = left.join(right, how=how, coalesce=False)
+
+        join_order = (
+            ["_bf_join_l", "_bf_join_r"]
+            if how != "right"
+            else ["_bf_join_r", "_bf_join_l"]
+        )
+        return joined.sort(join_order, nulls_last=True).drop(
             ["_bf_join_l", "_bf_join_r"]
         )
 
     @compile_node.register
     def compile_concat(self, node: nodes.ConcatNode):
-        return pl.concat(self.compile_node(child) for child in node.child_nodes)
+        child_frames = [self.compile_node(child) for child in node.child_nodes]
+        child_frames = [
+            frame.rename(
+                {col: id.sql for col, id in zip(frame.columns, node.output_ids)}
+            )
+            for frame in child_frames
+        ]
+        df = pl.concat(child_frames)
+        return df
 
     @compile_node.register
     def compile_agg(self, node: nodes.AggregateNode):
         df = self.compile_node(node.child)
-
+        if node.dropna and len(node.by_column_ids) > 0:
+            df = df.filter(
+                [pl.col(ref.id.sql).is_not_null() for ref in node.by_column_ids]
+            )
+        if node.order_by:
+            df = self._sort(df, node.order_by)
+        return self._aggregate(df, node.aggregations, node.by_column_ids)
+
+    def _aggregate(
+        self,
+        df: pl.LazyFrame,
+        aggregations: Sequence[Tuple[ex.Aggregation, identifiers.ColumnId]],
+        grouping_keys: Tuple[ex.DerefOp, ...],
+    ) -> pl.LazyFrame:
         # Need to materialize columns to broadcast constants
         agg_inputs = [
             list(
@@ -304,7 +508,7 @@ def compile_agg(self, node: nodes.AggregateNode):
                     self.agg_compiler.get_args(agg),
                 )
             )
-            for agg, _ in node.aggregations
+            for agg, _ in aggregations
         ]
 
         df_agg_inputs = df.with_columns(itertools.chain(*agg_inputs))
@@ -313,18 +517,19 @@ def compile_agg(self, node: nodes.AggregateNode):
             self.agg_compiler.compile_agg_op(
                 agg.op, list(map(lambda x: x.meta.output_name(), inputs))
             ).alias(id.sql)
-            for (agg, id), inputs in zip(node.aggregations, agg_inputs)
+            for (agg, id), inputs in zip(aggregations, agg_inputs)
         ]
 
-        if len(node.by_column_ids) > 0:
-            group_exprs = [pl.col(ref.id.sql) for ref in node.by_column_ids]
+        if len(grouping_keys) > 0:
+            group_exprs = [pl.col(ref.id.sql) for ref in grouping_keys]
             grouped_df = df_agg_inputs.group_by(group_exprs)
-            return grouped_df.agg(agg_exprs).sort(group_exprs)
+            return grouped_df.agg(agg_exprs).sort(group_exprs, nulls_last=True)
         else:
             return df_agg_inputs.select(agg_exprs)
 
     @compile_node.register
     def compile_explode(self, node: nodes.ExplodeNode):
+        assert node.offsets_col is None
         df = self.compile_node(node.child)
         cols = [pl.col(col.id.sql) for col in node.column_ids]
         return df.explode(cols)
@@ -338,55 +543,92 @@ def compile_sample(self, node: nodes.RandomSampleNode):
     @compile_node.register
     def compile_window(self, node: nodes.WindowOpNode):
         df = self.compile_node(node.child)
-        agg_expr = self.agg_compiler.compile_agg_expr(node.expression).alias(
-            node.output_name.sql
-        )
-        # Three window types: completely unbound, grouped and row bounded
 
         window = node.window_spec
-
+        # Should have been handled by reweriter
+        assert len(window.ordering) == 0
         if window.min_periods > 0:
             raise NotImplementedError("min_period not yet supported for polars engine")
 
-        if window.bounds is None:
+        if (window.bounds is None) or (window.is_unbounded):
             # polars will automatically broadcast the aggregate to the matching input rows
-            if len(window.grouping_keys) == 0:  # unbound window
-                pass
-            else:  # partition-only window
-                agg_expr = agg_expr.over(
-                    partition_by=[ref.id.sql for ref in window.grouping_keys]
-                )
-            return df.with_columns([agg_expr])
-
+            agg_pl = self.agg_compiler.compile_agg_expr(node.expression)
+            if window.grouping_keys:
+                agg_pl = agg_pl.over(id.id.sql for id in window.grouping_keys)
+            result = df.with_columns(agg_pl.alias(node.output_name.sql))
         else:  # row-bounded window
-            assert isinstance(window.bounds, window_spec.RowsWindowBounds)
-            # Polars API semi-bounded, and any grouped rolling window challenging
-            # https://github.com/pola-rs/polars/issues/4799
-            # https://github.com/pola-rs/polars/issues/8976
-            index_col_name = "_bf_pl_engine_offsets"
-            indexed_df = df.with_row_index(index_col_name)
-            if len(window.grouping_keys) == 0:  # rolling-only window
-                # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rolling.html
-                offset_n = window.bounds.start
-                period_n = _get_period(window.bounds) or df.collect().height
-                results = indexed_df.rolling(
-                    index_column=index_col_name,
-                    period=f"{period_n}i",
-                    offset=f"{offset_n}i" if offset_n else None,
-                ).agg(agg_expr)
-            else:  # groupby-rolling window
-                raise NotImplementedError(
-                    "Groupby rolling windows not yet implemented in polars engine"
-                )
-            # polars is columnar, so this is efficient
-            # TODO: why can't just add columns?
-            return pl.concat([df, results], how="horizontal")
+            window_result = self._calc_row_analytic_func(
+                df, node.expression, node.window_spec, node.output_name.sql
+            )
+            result = pl.concat([df, window_result], how="horizontal")
+
+        # Probably easier just to pull this out as a rewriter
+        if (
+            node.expression.op.skips_nulls
+            and not node.never_skip_nulls
+            and node.expression.column_references
+        ):
+            nullity_expr = functools.reduce(
+                operator.or_,
+                (
+                    pl.col(column.sql).is_null()
+                    for column in node.expression.column_references
+                ),
+            )
+            result = result.with_columns(
+                pl.when(nullity_expr)
+                .then(None)
+                .otherwise(pl.col(node.output_name.sql))
+                .alias(node.output_name.sql)
+            )
+        return result
+
+    def _calc_row_analytic_func(
+        self,
+        frame: pl.LazyFrame,
+        agg_expr: ex.Aggregation,
+        window: window_spec.WindowSpec,
+        name: str,
+    ) -> pl.LazyFrame:
+        if not isinstance(window.bounds, window_spec.RowsWindowBounds):
+            raise NotImplementedError("Only row bounds supported by polars engine")
+        groupby = None
+        if len(window.grouping_keys) > 0:
+            groupby = [
+                self.expr_compiler.compile_expression(ref)
+                for ref in window.grouping_keys
+            ]
+
+        # Polars API semi-bounded, and any grouped rolling window challenging
+        # https://github.com/pola-rs/polars/issues/4799
+        # https://github.com/pola-rs/polars/issues/8976
+        pl_agg_expr = self.agg_compiler.compile_agg_expr(agg_expr).alias(name)
+        index_col_name = "_bf_pl_engine_offsets"
+        indexed_df = frame.with_row_index(index_col_name)
+        # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rolling.html
+        period_n, offset_n = _get_period_and_offset(window.bounds)
+        return (
+            indexed_df.rolling(
+                index_column=index_col_name,
+                period=f"{period_n}i",
+                offset=f"{offset_n}i" if (offset_n is not None) else None,
+                group_by=groupby,
+            )
+            .agg(pl_agg_expr)
+            .select(name)
+        )
 
 
-def _get_period(bounds: window_spec.RowsWindowBounds) -> Optional[int]:
-    """Returns None if the boundary is infinite."""
-    if bounds.start is None or bounds.end is None:
-        return None
+def _get_period_and_offset(
+    bounds: window_spec.RowsWindowBounds,
+) -> tuple[int, Optional[int]]:
+    # fixed size window
+    if (bounds.start is not None) and (bounds.end is not None):
+        return ((bounds.end - bounds.start + 1), bounds.start - 1)
 
-    # collecting height is a massive kludge
-    return bounds.end - bounds.start + 1
+    LARGE_N = 1000000000
+    if bounds.start is not None:
+        return (LARGE_N, bounds.start - 1)
+    if bounds.end is not None:
+        return (LARGE_N, None)
+    raise ValueError("Not a bounded window")
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
index 7707f16dad..a1fc995159 100644
--- a/bigframes/core/compile/scalar_op_compiler.py
+++ b/bigframes/core/compile/scalar_op_compiler.py
@@ -30,7 +30,6 @@
 import bigframes.core.compile.default_ordering
 import bigframes.core.compile.ibis_types
 import bigframes.core.expression as ex
-import bigframes.dtypes
 import bigframes.operations as ops
 
 _ZERO = typing.cast(ibis_types.NumericValue, ibis_types.literal(0))
@@ -1074,11 +1073,22 @@ def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value):
     )
 
 
+@scalar_op_compiler.register_unary_op(ops.geo_st_isclosed_op, pass_op=False)
+def geo_st_isclosed_op_impl(x: ibis_types.Value):
+    return st_isclosed(x)
+
+
 @scalar_op_compiler.register_unary_op(ops.geo_x_op)
 def geo_x_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.GeoSpatialValue, x).x()
 
 
+@scalar_op_compiler.register_unary_op(ops.GeoStLengthOp, pass_op=True)
+def geo_length_op_impl(x: ibis_types.Value, op: ops.GeoStLengthOp):
+    # Call the st_length UDF defined in this file (or imported)
+    return st_length(x, op.use_spheroid)
+
+
 @scalar_op_compiler.register_unary_op(ops.geo_y_op)
 def geo_y_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.GeoSpatialValue, x).y()
@@ -2057,6 +2067,12 @@ def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid
     """Convert string to geography."""
 
 
+@ibis_udf.scalar.builtin
+def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float:  # type: ignore
+    """ST_LENGTH BQ builtin. This body is never executed."""
+    pass
+
+
 @ibis_udf.scalar.builtin
 def unix_micros(a: ibis_dtypes.timestamp) -> int:  # type: ignore
     """Convert a timestamp to microseconds"""
@@ -2180,6 +2196,11 @@ def str_lstrip_op(  # type: ignore[empty-body]
     """Remove leading and trailing characters."""
 
 
+@ibis_udf.scalar.builtin
+def st_isclosed(a: ibis_dtypes.geography) -> ibis_dtypes.boolean:  # type: ignore
+    """Checks if a geography is closed."""
+
+
 @ibis_udf.scalar.builtin(name="rtrim")
 def str_rstrip_op(  # type: ignore[empty-body]
     x: ibis_dtypes.String, to_strip: ibis_dtypes.String
diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py
index 1cb270297c..50169d1a8b 100644
--- a/bigframes/core/compile/sqlglot/compiler.py
+++ b/bigframes/core/compile/sqlglot/compiler.py
@@ -18,10 +18,9 @@
 import typing
 
 from google.cloud import bigquery
-import pyarrow as pa
 import sqlglot.expressions as sge
 
-from bigframes.core import expression, guid, identifiers, nodes, rewrite
+from bigframes.core import expression, guid, identifiers, nodes, pyarrow_utils, rewrite
 from bigframes.core.compile import configs
 import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
 import bigframes.core.compile.sqlglot.sqlglot_ir as ir
@@ -155,9 +154,7 @@ def compile_readlocal(self, node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
 
         offsets = node.offsets_col.sql if node.offsets_col else None
         if offsets:
-            pa_table = pa_table.append_column(
-                offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
-            )
+            pa_table = pyarrow_utils.append_offsets(pa_table, offsets)
 
         return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema, uid_gen=self.uid_gen)
 
diff --git a/bigframes/core/global_session.py b/bigframes/core/global_session.py
index d4d70f5a06..8732b55990 100644
--- a/bigframes/core/global_session.py
+++ b/bigframes/core/global_session.py
@@ -112,3 +112,23 @@ def get_global_session():
 
 def with_default_session(func: Callable[..., _T], *args, **kwargs) -> _T:
     return func(get_global_session(), *args, **kwargs)
+
+
+class _GlobalSessionContext:
+    """
+    Context manager for testing that sets global session.
+    """
+
+    def __init__(self, session: bigframes.session.Session):
+        self._session = session
+
+    def __enter__(self):
+        global _global_session, _global_session_lock
+        with _global_session_lock:
+            self._previous_session = _global_session
+            _global_session = self._session
+
+    def __exit__(self, *exc_details):
+        global _global_session, _global_session_lock
+        with _global_session_lock:
+            _global_session = self._previous_session
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
index 44b1d9d4fa..836d84b46a 100644
--- a/bigframes/core/indexes/base.py
+++ b/bigframes/core/indexes/base.py
@@ -504,6 +504,10 @@ def unique(self, level: Hashable | int | None = None) -> Index:
         return self.get_level_values(level).drop_duplicates()
 
     def isin(self, values) -> Index:
+        import bigframes.series as series
+
+        if isinstance(values, (series.Series, Index)):
+            return Index(self.to_series().isin(values))
         if not utils.is_list_like(values):
             raise TypeError(
                 "only list-like objects are allowed to be passed to "
@@ -614,6 +618,10 @@ def to_numpy(self, dtype=None, *, allow_large_results=None, **kwargs) -> np.ndar
     def __len__(self):
         return self.shape[0]
 
+    def item(self):
+        # Docstring is in third_party/bigframes_vendored/pandas/core/indexes/base.py
+        return self.to_series().peek(2).item()
+
 
 def _should_create_datetime_index(block: blocks.Block) -> bool:
     if len(block.index.dtypes) != 1:
diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py
index 2e8c4aff44..da1c174bc4 100644
--- a/bigframes/core/local_data.py
+++ b/bigframes/core/local_data.py
@@ -265,7 +265,13 @@ def _adapt_pandas_series(
 ) -> tuple[Union[pa.ChunkedArray, pa.Array], bigframes.dtypes.Dtype]:
     # Mostly rely on pyarrow conversions, but have to convert geo without its help.
     if series.dtype == bigframes.dtypes.GEO_DTYPE:
-        series = geopandas.GeoSeries(series).to_wkt(rounding_precision=-1)
+        # geoseries produces eg "POINT (1, 1)", while bq uses style "POINT(1, 1)"
+        # we normalize to bq style for consistency
+        series = (
+            geopandas.GeoSeries(series)
+            .to_wkt(rounding_precision=-1)
+            .str.replace(r"(\w+) \(", repl=r"\1(", regex=True)
+        )
         return pa.array(series, type=pa.string()), bigframes.dtypes.GEO_DTYPE
     try:
         return _adapt_arrow_array(pa.array(series))
@@ -295,7 +301,7 @@ def _adapt_chunked_array(
 
 
 def _adapt_arrow_array(array: pa.Array) -> tuple[pa.Array, bigframes.dtypes.Dtype]:
-    """Normalize the array to managed storage types. Preverse shapes, only transforms values."""
+    """Normalize the array to managed storage types. Preserve shapes, only transforms values."""
     if array.offset != 0:  # Offset arrays don't have all operations implemented
         return _adapt_arrow_array(pa.concat_arrays([array]))
 
@@ -326,7 +332,7 @@ def _adapt_arrow_array(array: pa.Array) -> tuple[pa.Array, bigframes.dtypes.Dtyp
         return new_value.fill_null([]), bigframes.dtypes.list_type(values_type)
     if array.type == bigframes.dtypes.JSON_ARROW_TYPE:
         return _canonicalize_json(array), bigframes.dtypes.JSON_DTYPE
-    target_type = _logical_type_replacements(array.type)
+    target_type = logical_type_replacements(array.type)
     if target_type != array.type:
         # TODO: Maybe warn if lossy conversion?
         array = array.cast(target_type)
@@ -372,6 +378,10 @@ def recursive_f(type: pa.DataType) -> pa.DataType:
             if new_field_t != type.value_type:
                 return pa.list_(new_field_t)
             return type
+        # polars can produce large lists, and we want to map these down to regular lists
+        if pa.types.is_large_list(type):
+            new_field_t = recursive_f(type.value_type)
+            return pa.list_(new_field_t)
         if pa.types.is_struct(type):
             struct_type = cast(pa.StructType, type)
             new_fields: list[pa.Field] = []
@@ -385,7 +395,7 @@ def recursive_f(type: pa.DataType) -> pa.DataType:
 
 
 @_recursive_map_types
-def _logical_type_replacements(type: pa.DataType) -> pa.DataType:
+def logical_type_replacements(type: pa.DataType) -> pa.DataType:
     if pa.types.is_timestamp(type):
         # This is potentially lossy, but BigFrames doesn't support ns
         new_tz = "UTC" if (type.tz is not None) else None
@@ -403,8 +413,11 @@ def _logical_type_replacements(type: pa.DataType) -> pa.DataType:
     if pa.types.is_large_string(type):
         # simple string type can handle the largest strings needed
         return pa.string()
+    if pa.types.is_large_binary(type):
+        # simple string type can handle the largest strings needed
+        return pa.binary()
     if pa.types.is_dictionary(type):
-        return _logical_type_replacements(type.value_type)
+        return logical_type_replacements(type.value_type)
     if pa.types.is_null(type):
         # null as a type not allowed, default type is float64 for bigframes
         return pa.float64()
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index cc82c844f7..9dcd74182b 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -154,6 +154,16 @@ def is_limit(self) -> bool:
             and (self.stop > 0)
         )
 
+    @property
+    def is_noop(self) -> bool:
+        """Returns whether this node doesn't actually change the results."""
+        # TODO: Handle tail case.
+        return (
+            ((not self.start) or (self.start == 0))
+            and (self.step == 1)
+            and ((self.stop is None) or (self.stop == self.row_count))
+        )
+
     @property
     def row_count(self) -> typing.Optional[int]:
         child_length = self.child.row_count
@@ -591,6 +601,10 @@ class ScanList:
 
     items: typing.Tuple[ScanItem, ...]
 
+    @classmethod
+    def from_items(cls, items: Iterable[ScanItem]) -> ScanList:
+        return cls(tuple(items))
+
     def filter_cols(
         self,
         ids: AbstractSet[identifiers.ColumnId],
diff --git a/bigframes/core/pyarrow_utils.py b/bigframes/core/pyarrow_utils.py
index eead30d908..4196e68304 100644
--- a/bigframes/core/pyarrow_utils.py
+++ b/bigframes/core/pyarrow_utils.py
@@ -85,3 +85,18 @@ def truncate_pyarrow_iterable(
         else:
             yield batch
             total_yielded += batch.num_rows
+
+
+def append_offsets(
+    pa_table: pa.Table,
+    offsets_col: str,
+) -> pa.Table:
+    return pa_table.append_column(
+        offsets_col, pa.array(range(pa_table.num_rows), type=pa.int64())
+    )
+
+
+def as_nullable(pa_table: pa.Table):
+    """Normalizes schema to nullable for value-wise comparisons."""
+    nullable_schema = pa.schema(field.with_nullable(True) for field in pa_table.schema)
+    return pa_table.cast(nullable_schema)
diff --git a/bigframes/core/rewrite/__init__.py b/bigframes/core/rewrite/__init__.py
index b8f1d26db8..5d554d45d7 100644
--- a/bigframes/core/rewrite/__init__.py
+++ b/bigframes/core/rewrite/__init__.py
@@ -24,7 +24,7 @@
 )
 from bigframes.core.rewrite.slices import pull_out_limit, pull_up_limits, rewrite_slice
 from bigframes.core.rewrite.timedeltas import rewrite_timedelta_expressions
-from bigframes.core.rewrite.windows import rewrite_range_rolling
+from bigframes.core.rewrite.windows import pull_out_window_order, rewrite_range_rolling
 
 __all__ = [
     "legacy_join_as_projection",
@@ -41,4 +41,5 @@
     "bake_order",
     "try_reduce_to_local_scan",
     "fold_row_counts",
+    "pull_out_window_order",
 ]
diff --git a/bigframes/core/rewrite/scan_reduction.py b/bigframes/core/rewrite/scan_reduction.py
index b9050c0c34..b0729337e7 100644
--- a/bigframes/core/rewrite/scan_reduction.py
+++ b/bigframes/core/rewrite/scan_reduction.py
@@ -16,6 +16,7 @@
 from typing import Optional
 
 from bigframes.core import nodes
+import bigframes.core.rewrite.slices
 
 
 def try_reduce_to_table_scan(root: nodes.BigFrameNode) -> Optional[nodes.ReadTableNode]:
@@ -28,7 +29,15 @@ def try_reduce_to_table_scan(root: nodes.BigFrameNode) -> Optional[nodes.ReadTab
     return None
 
 
-def try_reduce_to_local_scan(node: nodes.BigFrameNode) -> Optional[nodes.ReadLocalNode]:
+def try_reduce_to_local_scan(
+    node: nodes.BigFrameNode,
+) -> Optional[tuple[nodes.ReadLocalNode, Optional[int]]]:
+    """Create a ReadLocalNode with optional limit, if possible.
+
+    Similar to ReadApiSemiExecutor._try_adapt_plan.
+    """
+    node, limit = bigframes.core.rewrite.slices.pull_out_limit(node)
+
     if not all(
         map(
             lambda x: isinstance(x, (nodes.ReadLocalNode, nodes.SelectionNode)),
@@ -38,7 +47,7 @@ def try_reduce_to_local_scan(node: nodes.BigFrameNode) -> Optional[nodes.ReadLoc
         return None
     result = node.bottom_up(merge_scan)
     if isinstance(result, nodes.ReadLocalNode):
-        return result
+        return result, limit
     return None
 
 
diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index 92911310da..bed3a8a3f3 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -57,6 +57,9 @@ def pull_out_limit(
             if (prior_limit is not None) and (prior_limit < limit):
                 limit = prior_limit
             return new_root, limit
+        if root.is_noop:
+            new_root, prior_limit = pull_out_limit(root.child)
+            return new_root, prior_limit
     elif (
         isinstance(root, (nodes.SelectionNode, nodes.ProjectionNode))
         and root.row_preserving
diff --git a/bigframes/core/rewrite/windows.py b/bigframes/core/rewrite/windows.py
index 9f55db23af..6e9ba0dd3d 100644
--- a/bigframes/core/rewrite/windows.py
+++ b/bigframes/core/rewrite/windows.py
@@ -17,7 +17,7 @@
 import dataclasses
 
 from bigframes import operations as ops
-from bigframes.core import nodes
+from bigframes.core import guid, identifiers, nodes, ordering
 
 
 def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
@@ -43,3 +43,34 @@ def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
         node,
         window_spec=dataclasses.replace(node.window_spec, ordering=(new_ordering,)),
     )
+
+
+def pull_out_window_order(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    return root.bottom_up(rewrite_window_node)
+
+
+def rewrite_window_node(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    if not isinstance(node, nodes.WindowOpNode):
+        return node
+    if len(node.window_spec.ordering) == 0:
+        return node
+    else:
+        offsets_id = guid.generate_guid()
+        w_offsets = nodes.PromoteOffsetsNode(
+            node.child, identifiers.ColumnId(offsets_id)
+        )
+        sorted_child = nodes.OrderByNode(w_offsets, node.window_spec.ordering)
+        new_window_node = dataclasses.replace(
+            node,
+            child=sorted_child,
+            window_spec=node.window_spec.without_order(force=True),
+        )
+        w_resetted_order = nodes.OrderByNode(
+            new_window_node,
+            by=(ordering.ascending_over(identifiers.ColumnId(offsets_id)),),
+            is_total_order=True,
+        )
+        w_offsets_dropped = nodes.SelectionNode(
+            w_resetted_order, tuple(nodes.AliasedRef.identity(id) for id in node.ids)
+        )
+        return w_offsets_dropped
diff --git a/bigframes/core/schema.py b/bigframes/core/schema.py
index 4f636ab210..b1a77d1259 100644
--- a/bigframes/core/schema.py
+++ b/bigframes/core/schema.py
@@ -17,7 +17,7 @@
 from dataclasses import dataclass
 import functools
 import typing
-from typing import Sequence
+from typing import Dict, List, Sequence
 
 import google.cloud.bigquery
 import pyarrow
@@ -47,14 +47,24 @@ def from_bq_table(
         column_type_overrides: typing.Optional[
             typing.Dict[str, bigframes.dtypes.Dtype]
         ] = None,
+    ):
+        return ArraySchema.from_bq_schema(
+            table.schema, column_type_overrides=column_type_overrides
+        )
+
+    @classmethod
+    def from_bq_schema(
+        cls,
+        schema: List[google.cloud.bigquery.SchemaField],
+        column_type_overrides: typing.Optional[
+            Dict[str, bigframes.dtypes.Dtype]
+        ] = None,
     ):
         if column_type_overrides is None:
             column_type_overrides = {}
         items = tuple(
             SchemaItem(name, column_type_overrides.get(name, dtype))
-            for name, dtype in bigframes.dtypes.bf_type_from_type_kind(
-                table.schema
-            ).items()
+            for name, dtype in bigframes.dtypes.bf_type_from_type_kind(schema).items()
         )
         return ArraySchema(items)
 
diff --git a/bigframes/core/window_spec.py b/bigframes/core/window_spec.py
index d08ba3d12a..2be30135ee 100644
--- a/bigframes/core/window_spec.py
+++ b/bigframes/core/window_spec.py
@@ -234,7 +234,9 @@ def is_row_bounded(self):
         This is relevant for determining whether the window requires a total order
         to calculate deterministically.
         """
-        return isinstance(self.bounds, RowsWindowBounds)
+        return isinstance(self.bounds, RowsWindowBounds) and (
+            (self.bounds.start is not None) or (self.bounds.end is not None)
+        )
 
     @property
     def is_range_bounded(self):
@@ -254,7 +256,9 @@ def is_unbounded(self):
         This is relevant for determining whether the window requires a total order
         to calculate deterministically.
         """
-        return self.bounds is None
+        return self.bounds is None or (
+            self.bounds.start is None and self.bounds.end is None
+        )
 
     @property
     def all_referenced_columns(self) -> Set[ids.ColumnId]:
@@ -266,9 +270,9 @@ def all_referenced_columns(self) -> Set[ids.ColumnId]:
         )
         return set(itertools.chain((i.id for i in self.grouping_keys), ordering_vars))
 
-    def without_order(self) -> WindowSpec:
+    def without_order(self, force: bool = False) -> WindowSpec:
         """Removes ordering clause if ordering isn't required to define bounds."""
-        if self.is_row_bounded:
+        if self.is_row_bounded and not force:
             raise ValueError("Cannot remove order from row-bounded window")
         return replace(self, ordering=())
 
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 262fa9dde7..2c5df89665 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -754,7 +754,7 @@ def bf_type_from_type_kind(
 
 def is_dtype(scalar: typing.Any, dtype: Dtype) -> bool:
     """Captures whether a scalar can be losslessly represented by a dtype."""
-    if scalar is None:
+    if pd.isna(scalar):
         return True
     if pd.api.types.is_bool_dtype(dtype):
         return pd.api.types.is_bool(scalar)
diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py
index 0cc3d52c38..d03021dd23 100644
--- a/bigframes/functions/_function_client.py
+++ b/bigframes/functions/_function_client.py
@@ -125,11 +125,15 @@ def _ensure_dataset_exists(self) -> None:
     def _create_bq_function(self, create_function_ddl: str) -> None:
         # TODO(swast): plumb through the original, user-facing api_name.
         _, query_job = bigframes.session._io.bigquery.start_query_with_client(
-            self._session.bqclient,
+            cast(bigquery.Client, self._session.bqclient),
             create_function_ddl,
             job_config=bigquery.QueryJobConfig(),
+            location=None,
+            project=None,
+            timeout=None,
+            metrics=None,
+            query_with_job=True,
         )
-        assert query_job is not None
         logger.info(f"Created bigframes function {query_job.ddl_target_routine}")
 
     def _format_function_options(self, function_options: dict) -> str:
diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py
index 38ebda7d92..2999625cda 100644
--- a/bigframes/geopandas/geoseries.py
+++ b/bigframes/geopandas/geoseries.py
@@ -30,6 +30,12 @@ def __init__(self, data=None, index=None, **kwargs):
             data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs
         )
 
+    @property
+    def length(self):
+        raise NotImplementedError(
+            "GeoSeries.length is not yet implemented. Please use bigframes.bigquery.st_length(geoseries) instead."
+        )
+
     @property
     def x(self) -> bigframes.series.Series:
         series = self._apply_unary_op(ops.geo_x_op)
@@ -57,6 +63,15 @@ def boundary(self) -> bigframes.series.Series:  # type: ignore
         series.name = None
         return series
 
+    @property
+    def is_closed(self) -> bigframes.series.Series:
+        # TODO(tswast): GeoPandas doesn't treat Point as closed. Use ST_LENGTH
+        # when available to filter out "closed" shapes that return false in
+        # GeoPandas.
+        raise NotImplementedError(
+            f"GeoSeries.is_closed is not supported. Use bigframes.bigquery.st_isclosed(series), instead. {constants.FEEDBACK_LINK}"
+        )
+
     @classmethod
     def from_wkt(cls, data, index=None) -> GeoSeries:
         series = bigframes.series.Series(data, index=index)
diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py
index a03dc937dc..cd27357680 100644
--- a/bigframes/ml/cluster.py
+++ b/bigframes/ml/cluster.py
@@ -59,7 +59,8 @@ def __init__(
         warm_start: bool = False,
     ):
         self.n_clusters = n_clusters
-        self.init = init
+        # allow the alias to be compatible with sklean
+        self.init = "kmeans++" if init == "k-means++" else init
         self.init_col = init_col
         self.distance_type = distance_type
         self.max_iter = max_iter
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
index 3e97ec6f4a..faf4e18d5e 100644
--- a/bigframes/operations/__init__.py
+++ b/bigframes/operations/__init__.py
@@ -98,9 +98,11 @@
     geo_st_geogfromtext_op,
     geo_st_geogpoint_op,
     geo_st_intersection_op,
+    geo_st_isclosed_op,
     geo_x_op,
     geo_y_op,
     GeoStDistanceOp,
+    GeoStLengthOp,
 )
 from bigframes.operations.json_ops import (
     JSONExtract,
@@ -385,6 +387,8 @@
     "geo_st_geogfromtext_op",
     "geo_st_geogpoint_op",
     "geo_st_intersection_op",
+    "geo_st_isclosed_op",
+    "GeoStLengthOp",
     "geo_x_op",
     "geo_y_op",
     "GeoStDistanceOp",
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index e3f15e67a1..1c321c0bf8 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -439,7 +439,6 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
         return dtypes.INT_DTYPE
 
 
-# TODO: Convert to NullaryWindowOp
 @dataclasses.dataclass(frozen=True)
 class RankOp(UnaryWindowOp):
     name: ClassVar[str] = "rank"
@@ -456,7 +455,6 @@ def implicitly_inherits_order(self):
         return False
 
 
-# TODO: Convert to NullaryWindowOp
 @dataclasses.dataclass(frozen=True)
 class DenseRankOp(UnaryWindowOp):
     @property
diff --git a/bigframes/operations/ai.py b/bigframes/operations/ai.py
index c65947f53f..87245d104e 100644
--- a/bigframes/operations/ai.py
+++ b/bigframes/operations/ai.py
@@ -16,7 +16,7 @@
 
 import re
 import typing
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Sequence
 import warnings
 
 import numpy as np
@@ -258,6 +258,101 @@ def extract_logprob(s: bigframes.series.Series) -> bigframes.series.Series:
 
         return concat([self._df, *attach_columns], axis=1)
 
+    def classify(
+        self,
+        instruction: str,
+        model,
+        labels: Sequence[str],
+        output_column: str = "result",
+        ground_with_google_search: bool = False,
+        attach_logprobs=False,
+    ):
+        """
+        Classifies the rows of dataframes based on user instruction into the provided labels.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.ai_operators = True
+            >>> bpd.options.compute.ai_ops_confirmation_threshold = 25
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-2.0-flash-001")
+
+            >>> df = bpd.DataFrame({
+            ...     "feedback_text": [
+            ...         "The product is amazing, but the shipping was slow.",
+            ...         "I had an issue with my recent bill.",
+            ...         "The user interface is very intuitive."
+            ...     ],
+            ... })
+            >>> df.ai.classify("{feedback_text}", model=model, labels=["Shipping", "Billing", "UI"])
+                                                   feedback_text     result
+            0  The product is amazing, but the shipping was s...   Shipping
+            1                I had an issue with my recent bill.    Billing
+            2              The user interface is very intuitive.         UI
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Args:
+            instruction (str):
+                An instruction on how to classify the data. This value must contain
+                column references by name, which should be wrapped in a pair of braces.
+                For example, if you have a column "feedback", you can refer to this column
+                with"{food}".
+
+            model (bigframes.ml.llm.GeminiTextGenerator):
+                A GeminiTextGenerator provided by Bigframes ML package.
+
+            labels (Sequence[str]):
+                A collection of labels (categories). It must contain at least two and at most 20 elements.
+                Labels are case sensitive. Duplicated labels are not allowed.
+
+            output_column (str, default "result"):
+                The name of column for the output.
+
+            ground_with_google_search (bool, default False):
+                Enables Grounding with Google Search for the GeminiTextGenerator model.
+                When set to True, the model incorporates relevant information from Google
+                Search results into its responses, enhancing their accuracy and factualness.
+                Note: Using this feature may impact billing costs. Refer to the pricing
+                page for details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models
+                The default is `False`.
+
+            attach_logprobs (bool, default False):
+                Controls whether to attach an additional "logprob" column for each result. Logprobs are float-point values reflecting the confidence level
+                of the LLM for their responses. Higher values indicate more confidence. The value is in the range between negative infinite and 0.
+
+
+        Returns:
+            bigframes.pandas.DataFrame: DataFrame with classification result.
+
+        Raises:
+            NotImplementedError: when the AI operator experiment is off.
+            ValueError: when the instruction refers to a non-existing column, when no
+                columns are referred to, or when the count of labels does not meet the
+                requirement.
+        """
+
+        if len(labels) < 2 or len(labels) > 20:
+            raise ValueError(
+                f"The number of labels should be between 2 and 20 (inclusive), but {len(labels)} labels are provided."
+            )
+
+        if len(set(labels)) != len(labels):
+            raise ValueError("There are duplicate labels.")
+
+        updated_instruction = f"Based on the user instruction {instruction}, you must provide an answer that must exist in the following list of labels: {labels}"
+
+        return self.map(
+            updated_instruction,
+            model,
+            output_schema={output_column: "string"},
+            ground_with_google_search=ground_with_google_search,
+            attach_logprobs=attach_logprobs,
+        )
+
     def join(
         self,
         other,
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
index 8da88d1ff8..e143cfc519 100644
--- a/bigframes/operations/blob.py
+++ b/bigframes/operations/blob.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import os
-from typing import cast, Optional, Union
+from typing import cast, Literal, Optional, Union
 import warnings
 
 import IPython.display as ipy_display
@@ -736,3 +736,77 @@ def pdf_chunk(
             return struct_series
         else:
             return content_series
+
+    def audio_transcribe(
+        self,
+        *,
+        connection: Optional[str] = None,
+        model_name: Optional[
+            Literal[
+                "gemini-2.0-flash-001",
+                "gemini-2.0-flash-lite-001",
+            ]
+        ] = None,
+        verbose: bool = False,
+    ) -> bigframes.series.Series:
+        """
+        Transcribe audio content using a Gemini multimodal model.
+
+        Args:
+            connection (str or None, default None): BQ connection used for
+                function internet transactions, and the output blob if "dst"
+                is str. If None, uses default connection of the session.
+            model_name (str): The model for natural language tasks. Accepted
+                values are "gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001".
+                See "https://ai.google.dev/gemini-api/docs/models" for model choices.
+            verbose (bool, default "False"): controls the verbosity of the output.
+                When set to True, both error messages and the transcribed content
+                are displayed. Conversely, when set to False, only the transcribed
+                content is presented, suppressing error messages.
+
+        Returns:
+            bigframes.series.Series: str or struct[str, str],
+                depend on the "verbose" parameter.
+                Contains the transcribed text from the audio file.
+                Includes error messages if verbosity is enabled.
+        """
+        import bigframes.bigquery as bbq
+        import bigframes.ml.llm as llm
+        import bigframes.pandas as bpd
+
+        # col name doesn't matter here. Rename to avoid column name conflicts
+        audio_series = bigframes.series.Series(self._block)
+
+        prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio."
+
+        llm_model = llm.GeminiTextGenerator(
+            model_name=model_name,
+            session=self._block.session,
+            connection_name=connection,
+        )
+
+        # transcribe audio using ML.GENERATE_TEXT
+        transcribed_results = llm_model.predict(
+            X=audio_series,
+            prompt=[prompt_text, audio_series],
+            temperature=0.0,
+        )
+
+        transcribed_content_series = cast(
+            bpd.Series, transcribed_results["ml_generate_text_llm_result"]
+        ).rename("transcribed_content")
+
+        if verbose:
+            transcribed_status_series = cast(
+                bpd.Series, transcribed_results["ml_generate_text_status"]
+            )
+            results_df = bpd.DataFrame(
+                {
+                    "status": transcribed_status_series,
+                    "content": transcribed_content_series,
+                }
+            )
+            results_struct = bbq.struct(results_df).rename("transcription_results")
+            return results_struct
+        else:
+            return transcribed_content_series
diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py
index 98da9099cd..1b99e47ab1 100644
--- a/bigframes/operations/geo_ops.py
+++ b/bigframes/operations/geo_ops.py
@@ -54,6 +54,13 @@
     name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo()
 )
 
+geo_st_isclosed_op = base_ops.create_unary_op(
+    name="geo_st_isclosed",
+    type_signature=op_typing.FixedOutputType(
+        dtypes.is_geo_like, dtypes.BOOL_DTYPE, description="geo-like"
+    ),
+)
+
 geo_x_op = base_ops.create_unary_op(
     name="geo_x",
     type_signature=op_typing.FixedOutputType(
@@ -80,3 +87,12 @@ class GeoStDistanceOp(base_ops.BinaryOp):
 
     def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
         return dtypes.FLOAT_DTYPE
+
+
+@dataclasses.dataclass(frozen=True)
+class GeoStLengthOp(base_ops.UnaryOp):
+    name = "geo_st_length"
+    use_spheroid: bool = False
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        return dtypes.FLOAT_DTYPE
diff --git a/bigframes/series.py b/bigframes/series.py
index 74e8d03c8d..1bb0c1e0dc 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -960,6 +960,10 @@ def peek(
         as_series.name = self.name
         return as_series
 
+    def item(self):
+        # Docstring is in third_party/bigframes_vendored/pandas/core/series.py
+        return self.peek(2).item()
+
     def nlargest(self, n: int = 5, keep: str = "first") -> Series:
         if keep not in ("first", "last", "all"):
             raise ValueError("'keep must be one of 'first', 'last', or 'all'")
@@ -979,8 +983,10 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
         )
 
     def isin(self, values) -> "Series" | None:
-        if isinstance(values, (Series,)):
+        if isinstance(values, Series):
             return Series(self._block.isin(values._block))
+        if isinstance(values, indexes.Index):
+            return Series(self._block.isin(values.to_series()._block))
         if not _is_list_like(values):
             raise TypeError(
                 "only list-like objects are allowed to be passed to "
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index c24dca554a..ab09230c99 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -537,6 +537,10 @@ def _read_gbq_colab(
             index_col=bigframes.enums.DefaultIndexKind.NULL,
             force_total_order=False,
             dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run),
+            # TODO(tswast): we may need to allow allow_large_results to be overwritten
+            # or possibly a general configuration object for an explicit
+            # destination table and write disposition.
+            allow_large_results=False,
         )
 
     @overload
@@ -1166,7 +1170,11 @@ def _read_csv_w_bigquery_engine(
 
         table_id = self._loader.load_file(filepath_or_buffer, job_config=job_config)
         df = self._loader.read_gbq_table(
-            table_id, index_col=index_col, columns=columns, names=names
+            table_id,
+            index_col=index_col,
+            columns=columns,
+            names=names,
+            index_col_in_columns=True,
         )
 
         if dtype is not None:
@@ -1917,10 +1925,15 @@ def _start_query_ml_ddl(
         # https://cloud.google.com/bigquery/docs/customer-managed-encryption#encrypt-model
         job_config.destination_encryption_configuration = None
         iterator, query_job = bf_io_bigquery.start_query_with_client(
-            self.bqclient, sql, job_config=job_config, metrics=self._metrics
+            self.bqclient,
+            sql,
+            job_config=job_config,
+            metrics=self._metrics,
+            location=None,
+            project=None,
+            timeout=None,
+            query_with_job=True,
         )
-
-        assert query_job is not None
         return iterator, query_job
 
     def _create_object_table(self, path: str, connection: str) -> str:
@@ -1943,6 +1956,10 @@ def _create_object_table(self, path: str, connection: str) -> str:
             sql,
             job_config=bigquery.QueryJobConfig(),
             metrics=self._metrics,
+            location=None,
+            project=None,
+            timeout=None,
+            query_with_job=True,
         )
 
         return table
diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py
index 267111afe0..fdc240fa69 100644
--- a/bigframes/session/_io/bigquery/__init__.py
+++ b/bigframes/session/_io/bigquery/__init__.py
@@ -22,7 +22,7 @@
 import textwrap
 import types
 import typing
-from typing import Dict, Iterable, Mapping, Optional, Tuple, Union
+from typing import Dict, Iterable, Literal, Mapping, Optional, overload, Tuple, Union
 
 import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import google.api_core.exceptions
@@ -38,7 +38,6 @@
 
 
 IO_ORDERING_ID = "bqdf_row_nums"
-MAX_LABELS_COUNT = 64 - 8
 _LIST_TABLES_LIMIT = 10000  # calls to bqclient.list_tables
 # will be limited to this many tables
 
@@ -73,7 +72,12 @@ def create_job_configs_labels(
         )
     )
     values = list(itertools.chain(job_configs_labels.values(), api_methods))
-    return dict(zip(labels[:MAX_LABELS_COUNT], values[:MAX_LABELS_COUNT]))
+    return dict(
+        zip(
+            labels[: log_adapter.MAX_LABELS_COUNT],
+            values[: log_adapter.MAX_LABELS_COUNT],
+        )
+    )
 
 
 def create_export_data_statement(
@@ -223,8 +227,7 @@ def format_option(key: str, value: Union[bool, str]) -> str:
 def add_and_trim_labels(job_config):
     """
     Add additional labels to the job configuration and trim the total number of labels
-    to ensure they do not exceed the maximum limit allowed by BigQuery, which is 64
-    labels per job.
+    to ensure they do not exceed MAX_LABELS_COUNT labels per job.
     """
     api_methods = log_adapter.get_and_reset_api_methods(dry_run=job_config.dry_run)
     job_config.labels = create_job_configs_labels(
@@ -233,23 +236,54 @@ def add_and_trim_labels(job_config):
     )
 
 
+@overload
 def start_query_with_client(
     bq_client: bigquery.Client,
     sql: str,
-    job_config: bigquery.job.QueryJobConfig,
+    *,
+    job_config: bigquery.QueryJobConfig,
+    location: Optional[str],
+    project: Optional[str],
+    timeout: Optional[float],
+    metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
+    query_with_job: Literal[True],
+) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
+    ...
+
+
+@overload
+def start_query_with_client(
+    bq_client: bigquery.Client,
+    sql: str,
+    *,
+    job_config: bigquery.QueryJobConfig,
+    location: Optional[str],
+    project: Optional[str],
+    timeout: Optional[float],
+    metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
+    query_with_job: Literal[False],
+) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]:
+    ...
+
+
+def start_query_with_client(
+    bq_client: bigquery.Client,
+    sql: str,
+    *,
+    job_config: bigquery.QueryJobConfig,
     location: Optional[str] = None,
     project: Optional[str] = None,
     timeout: Optional[float] = None,
     metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
-    *,
     query_with_job: bool = True,
 ) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]:
     """
     Starts query job and waits for results.
     """
     try:
-        # Note: Ensure no additional labels are added to job_config after this point,
-        # as `add_and_trim_labels` ensures the label count does not exceed 64.
+        # Note: Ensure no additional labels are added to job_config after this
+        # point, as `add_and_trim_labels` ensures the label count does not
+        # exceed MAX_LABELS_COUNT.
         add_and_trim_labels(job_config)
         if not query_with_job:
             results_iterator = bq_client.query_and_wait(
@@ -322,8 +356,8 @@ def delete_tables_matching_session_id(
 
 def create_bq_dataset_reference(
     bq_client: bigquery.Client,
-    location=None,
-    project=None,
+    location: Optional[str] = None,
+    project: Optional[str] = None,
 ) -> bigquery.DatasetReference:
     """Create and identify dataset(s) for temporary BQ resources.
 
@@ -352,6 +386,9 @@ def create_bq_dataset_reference(
         location=location,
         job_config=job_config,
         project=project,
+        timeout=None,
+        metrics=None,
+        query_with_job=True,
     )
 
     # The anonymous dataset is used by BigQuery to write query results and
@@ -359,7 +396,6 @@ def create_bq_dataset_reference(
     # to the dataset, no BigQuery Session required. Note: there is a
     # different anonymous dataset per location. See:
     # https://cloud.google.com/bigquery/docs/cached-results#how_cached_results_are_stored
-    assert query_job is not None
     query_destination = query_job.destination
     return bigquery.DatasetReference(
         query_destination.project,
diff --git a/bigframes/session/_io/bigquery/read_gbq_query.py b/bigframes/session/_io/bigquery/read_gbq_query.py
new file mode 100644
index 0000000000..70c83d7875
--- /dev/null
+++ b/bigframes/session/_io/bigquery/read_gbq_query.py
@@ -0,0 +1,90 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Private helpers for implementing read_gbq_query."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from google.cloud import bigquery
+import google.cloud.bigquery.table
+import pandas
+
+from bigframes import dataframe
+from bigframes.core import local_data, pyarrow_utils
+import bigframes.core as core
+import bigframes.core.blocks as blocks
+import bigframes.core.guid
+import bigframes.core.schema as schemata
+import bigframes.session
+
+
+def create_dataframe_from_query_job_stats(
+    query_job: Optional[bigquery.QueryJob], *, session: bigframes.session.Session
+) -> dataframe.DataFrame:
+    """Convert a QueryJob into a DataFrame with key statistics about the query.
+
+    Any changes you make here, please try to keep in sync with pandas-gbq.
+    """
+    return dataframe.DataFrame(
+        data=pandas.DataFrame(
+            {
+                "statement_type": [
+                    query_job.statement_type if query_job else "unknown"
+                ],
+                "job_id": [query_job.job_id if query_job else "unknown"],
+                "location": [query_job.location if query_job else "unknown"],
+            }
+        ),
+        session=session,
+    )
+
+
+def create_dataframe_from_row_iterator(
+    rows: google.cloud.bigquery.table.RowIterator, *, session: bigframes.session.Session
+) -> dataframe.DataFrame:
+    """Convert a RowIterator into a DataFrame wrapping a LocalNode.
+
+    This allows us to create a DataFrame from query results, even in the
+    'jobless' case where there's no destination table.
+    """
+    pa_table = rows.to_arrow()
+
+    # TODO(tswast): Use array_value.promote_offsets() instead once that node is
+    # supported by the local engine.
+    offsets_col = bigframes.core.guid.generate_guid()
+    pa_table = pyarrow_utils.append_offsets(pa_table, offsets_col=offsets_col)
+
+    # We use the ManagedArrowTable constructor directly, because the
+    # results of to_arrow() should be the source of truth with regards
+    # to canonical formats since it comes from either the BQ Storage
+    # Read API or has been transformed by google-cloud-bigquery to look
+    # like the output of the BQ Storage Read API.
+    mat = local_data.ManagedArrowTable(
+        pa_table,
+        schemata.ArraySchema.from_bq_schema(
+            list(rows.schema) + [bigquery.SchemaField(offsets_col, "INTEGER")]
+        ),
+    )
+    mat.validate()
+
+    array_value = core.ArrayValue.from_managed(mat, session)
+    block = blocks.Block(
+        array_value,
+        (offsets_col,),
+        [field.name for field in rows.schema],
+        (None,),
+    )
+    return dataframe.DataFrame(block)
diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py
index 33d3314a1e..47be6fa768 100644
--- a/bigframes/session/bq_caching_executor.py
+++ b/bigframes/session/bq_caching_executor.py
@@ -320,6 +320,10 @@ def export_gcs(
             export_data_statement,
             job_config=bigquery.QueryJobConfig(),
             metrics=self.metrics,
+            project=None,
+            location=None,
+            timeout=None,
+            query_with_job=True,
         )
         return query_job
 
@@ -383,14 +387,29 @@ def _run_execute_query(
             job_config.labels["bigframes-mode"] = "unordered"
 
         try:
-            iterator, query_job = bq_io.start_query_with_client(
-                self.bqclient,
-                sql,
-                job_config=job_config,
-                metrics=self.metrics,
-                query_with_job=query_with_job,
-            )
-            return iterator, query_job
+            # Trick the type checker into thinking we got a literal.
+            if query_with_job:
+                return bq_io.start_query_with_client(
+                    self.bqclient,
+                    sql,
+                    job_config=job_config,
+                    metrics=self.metrics,
+                    project=None,
+                    location=None,
+                    timeout=None,
+                    query_with_job=True,
+                )
+            else:
+                return bq_io.start_query_with_client(
+                    self.bqclient,
+                    sql,
+                    job_config=job_config,
+                    metrics=self.metrics,
+                    project=None,
+                    location=None,
+                    timeout=None,
+                    query_with_job=False,
+                )
 
         except google.api_core.exceptions.BadRequest as e:
             # Unfortunately, this error type does not have a separate error code or exception type
diff --git a/bigframes/session/direct_gbq_execution.py b/bigframes/session/direct_gbq_execution.py
new file mode 100644
index 0000000000..4b19f7441d
--- /dev/null
+++ b/bigframes/session/direct_gbq_execution.py
@@ -0,0 +1,76 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Optional, Tuple
+
+from google.cloud import bigquery
+import google.cloud.bigquery.job as bq_job
+import google.cloud.bigquery.table as bq_table
+
+from bigframes.core import compile, nodes
+from bigframes.session import executor, semi_executor
+import bigframes.session._io.bigquery as bq_io
+
+
+# used only in testing right now, BigQueryCachingExecutor is the fully featured engine
+# simplified, doesnt not do large >10 gb result queries, error handling, respect global config
+# or record metrics. Also avoids caching, and most pre-compile rewrites, to better serve as a
+# reference for validating more complex executors.
+class DirectGbqExecutor(semi_executor.SemiExecutor):
+    def __init__(self, bqclient: bigquery.Client):
+        self.bqclient = bqclient
+
+    def execute(
+        self,
+        plan: nodes.BigFrameNode,
+        ordered: bool,
+        peek: Optional[int] = None,
+    ) -> executor.ExecuteResult:
+        """Just execute whatever plan as is, without further caching or decomposition."""
+        # TODO(swast): plumb through the api_name of the user-facing api that
+        # caused this query.
+
+        compiled = compile.compile_sql(
+            compile.CompileRequest(plan, sort_rows=ordered, peek_count=peek)
+        )
+        iterator, query_job = self._run_execute_query(
+            sql=compiled.sql,
+        )
+
+        return executor.ExecuteResult(
+            arrow_batches=iterator.to_arrow_iterable(),
+            schema=plan.schema,
+            query_job=query_job,
+            total_rows=iterator.total_rows,
+        )
+
+    def _run_execute_query(
+        self,
+        sql: str,
+        job_config: Optional[bq_job.QueryJobConfig] = None,
+    ) -> Tuple[bq_table.RowIterator, Optional[bigquery.QueryJob]]:
+        """
+        Starts BigQuery query job and waits for results.
+        """
+        return bq_io.start_query_with_client(
+            self.bqclient,
+            sql,
+            job_config=job_config or bq_job.QueryJobConfig(),
+            project=None,
+            location=None,
+            timeout=None,
+            metrics=None,
+            query_with_job=False,
+        )
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index ba669a62bb..814d44292e 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -22,6 +22,7 @@
 import os
 import typing
 from typing import (
+    cast,
     Dict,
     Generator,
     Hashable,
@@ -39,6 +40,7 @@
 import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
 import google.api_core.exceptions
 from google.cloud import bigquery_storage_v1
+import google.cloud.bigquery
 import google.cloud.bigquery as bigquery
 from google.cloud.bigquery_storage_v1 import types as bq_storage_types
 import pandas
@@ -52,6 +54,7 @@
 import bigframes.formatting_helpers as formatting_helpers
 from bigframes.session import dry_runs
 import bigframes.session._io.bigquery as bf_io_bigquery
+import bigframes.session._io.bigquery.read_gbq_query as bf_read_gbq_query
 import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
 import bigframes.session.metrics
 import bigframes.session.temporary_storage
@@ -93,7 +96,31 @@ def _to_index_cols(
     return index_cols
 
 
-def _check_column_duplicates(index_cols: Iterable[str], columns: Iterable[str]):
+def _check_column_duplicates(
+    index_cols: Iterable[str], columns: Iterable[str], index_col_in_columns: bool
+) -> Iterable[str]:
+    """Validates and processes index and data columns for duplicates and overlap.
+
+    This function performs two main tasks:
+    1.  Ensures there are no duplicate column names within the `index_cols` list
+        or within the `columns` list.
+    2.  Based on the `index_col_in_columns` flag, it validates the relationship
+        between `index_cols` and `columns`.
+
+    Args:
+        index_cols (Iterable[str]):
+            An iterable of column names designated as the index.
+        columns (Iterable[str]):
+            An iterable of column names designated as the data columns.
+        index_col_in_columns (bool):
+            A flag indicating how to handle overlap between `index_cols` and
+            `columns`.
+            - If `False`, the two lists must be disjoint (contain no common
+              elements). An error is raised if any overlap is found.
+            - If `True`, `index_cols` is expected to be a subset of
+              `columns`. An error is raised if an index column is not found
+              in the `columns` list.
+    """
     index_cols_list = list(index_cols) if index_cols is not None else []
     columns_list = list(columns) if columns is not None else []
     set_index = set(index_cols_list)
@@ -105,17 +132,29 @@ def _check_column_duplicates(index_cols: Iterable[str], columns: Iterable[str]):
             "All column names specified in 'index_col' must be unique."
         )
 
+    if len(columns_list) == 0:
+        return columns
+
     if len(columns_list) > len(set_columns):
         raise ValueError(
             "The 'columns' argument contains duplicate names. "
             "All column names specified in 'columns' must be unique."
         )
 
-    if not set_index.isdisjoint(set_columns):
-        raise ValueError(
-            "Found column names that exist in both 'index_col' and 'columns' arguments. "
-            "These arguments must specify distinct sets of columns."
-        )
+    if index_col_in_columns:
+        if not set_index.issubset(set_columns):
+            raise ValueError(
+                f"The specified index column(s) were not found: {set_index - set_columns}. "
+                f"Available columns are: {set_columns}"
+            )
+        return [col for col in columns if col not in set_index]
+    else:
+        if not set_index.isdisjoint(set_columns):
+            raise ValueError(
+                "Found column names that exist in both 'index_col' and 'columns' arguments. "
+                "These arguments must specify distinct sets of columns."
+            )
+        return columns
 
 
 @dataclasses.dataclass
@@ -388,6 +427,7 @@ def read_gbq_table(  # type: ignore[overload-overlap]
         dry_run: Literal[False] = ...,
         force_total_order: Optional[bool] = ...,
         n_rows: Optional[int] = None,
+        index_col_in_columns: bool = False,
     ) -> dataframe.DataFrame:
         ...
 
@@ -410,6 +450,7 @@ def read_gbq_table(
         dry_run: Literal[True] = ...,
         force_total_order: Optional[bool] = ...,
         n_rows: Optional[int] = None,
+        index_col_in_columns: bool = False,
     ) -> pandas.Series:
         ...
 
@@ -431,7 +472,67 @@ def read_gbq_table(
         dry_run: bool = False,
         force_total_order: Optional[bool] = None,
         n_rows: Optional[int] = None,
+        index_col_in_columns: bool = False,
     ) -> dataframe.DataFrame | pandas.Series:
+        """Read a BigQuery table into a BigQuery DataFrames DataFrame.
+
+        This method allows you to create a DataFrame from a BigQuery table.
+        You can specify the columns to load, an index column, and apply
+        filters.
+
+        Args:
+            table_id (str):
+                The identifier of the BigQuery table to read.
+            index_col (Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind, optional):
+                The column(s) to use as the index for the DataFrame. This can be
+                a single column name or a list of column names. If not provided,
+                a default index will be used based on the session's
+                ``default_index_type``.
+            columns (Iterable[str], optional):
+                The columns to read from the table. If not specified, all
+                columns will be read.
+            names (Optional[Iterable[str]], optional):
+                A list of column names to use for the resulting DataFrame. This
+                is useful if you want to rename the columns as you read the
+                data.
+            max_results (Optional[int], optional):
+                The maximum number of rows to retrieve from the table. If not
+                specified, all rows will be loaded.
+            use_cache (bool, optional):
+                Whether to use cached results for the query. Defaults to True.
+                Setting this to False will force a re-execution of the query.
+            filters (third_party_pandas_gbq.FiltersType, optional):
+                A list of filters to apply to the data. Filters are specified
+                as a list of tuples, where each tuple contains a column name,
+                an operator (e.g., '==', '!='), and a value.
+            enable_snapshot (bool, optional):
+                If True, a snapshot of the table is used to ensure that the
+                DataFrame is deterministic, even if the underlying table
+                changes. Defaults to True.
+            dry_run (bool, optional):
+                If True, the function will not actually execute the query but
+                will instead return statistics about the table. Defaults to False.
+            force_total_order (Optional[bool], optional):
+                If True, a total ordering is enforced on the DataFrame, which
+                can be useful for operations that require a stable row order.
+                If None, the session's default behavior is used.
+            n_rows (Optional[int], optional):
+                The number of rows to consider for type inference and other
+                metadata operations. This does not limit the number of rows
+                in the final DataFrame.
+            index_col_in_columns (bool, optional):
+                Specifies if the ``index_col`` is also present in the ``columns``
+                list. Defaults to ``False``.
+
+                * If ``False``, ``index_col`` and ``columns`` must specify
+                    distinct sets of columns. An error will be raised if any
+                    column is found in both.
+                * If ``True``, the column(s) in ``index_col`` are expected to
+                    also be present in the ``columns`` list. This is useful
+                    when the index is selected from the data columns (e.g., in a
+                    ``read_csv`` scenario). The column will be used as the
+                    DataFrame's index and removed from the list of value columns.
+        """
         import bigframes._tools.strings
         import bigframes.dataframe as dataframe
 
@@ -513,7 +614,9 @@ def read_gbq_table(
             index_col=index_col,
             names=names,
         )
-        _check_column_duplicates(index_cols, columns)
+        columns = list(
+            _check_column_duplicates(index_cols, columns, index_col_in_columns)
+        )
 
         for key in index_cols:
             if key not in table_column_names:
@@ -736,6 +839,7 @@ def read_gbq_query(  # type: ignore[overload-overlap]
         filters: third_party_pandas_gbq.FiltersType = ...,
         dry_run: Literal[False] = ...,
         force_total_order: Optional[bool] = ...,
+        allow_large_results: bool = ...,
     ) -> dataframe.DataFrame:
         ...
 
@@ -752,6 +856,7 @@ def read_gbq_query(
         filters: third_party_pandas_gbq.FiltersType = ...,
         dry_run: Literal[True] = ...,
         force_total_order: Optional[bool] = ...,
+        allow_large_results: bool = ...,
     ) -> pandas.Series:
         ...
 
@@ -767,9 +872,8 @@ def read_gbq_query(
         filters: third_party_pandas_gbq.FiltersType = (),
         dry_run: bool = False,
         force_total_order: Optional[bool] = None,
+        allow_large_results: bool = True,
     ) -> dataframe.DataFrame | pandas.Series:
-        import bigframes.dataframe as dataframe
-
         configuration = _transform_read_gbq_configuration(configuration)
 
         if "query" not in configuration:
@@ -794,7 +898,9 @@ def read_gbq_query(
             )
 
         index_cols = _to_index_cols(index_col)
-        _check_column_duplicates(index_cols, columns)
+        columns = _check_column_duplicates(
+            index_cols, columns, index_col_in_columns=False
+        )
 
         filters_copy1, filters_copy2 = itertools.tee(filters)
         has_filters = len(list(filters_copy1)) != 0
@@ -824,29 +930,72 @@ def read_gbq_query(
                 query_job, list(columns), index_cols
             )
 
-        # No cluster candidates as user query might not be clusterable (eg because of ORDER BY clause)
-        destination, query_job = self._query_to_destination(
-            query,
-            cluster_candidates=[],
-            configuration=configuration,
-        )
+        query_job_for_metrics: Optional[bigquery.QueryJob] = None
+        destination: Optional[bigquery.TableReference] = None
 
+        # TODO(b/421161077): If an explicit destination table is set in
+        # configuration, should we respect that setting?
+        if allow_large_results:
+            destination, query_job = self._query_to_destination(
+                query,
+                # No cluster candidates as user query might not be clusterable
+                # (eg because of ORDER BY clause)
+                cluster_candidates=[],
+                configuration=configuration,
+            )
+            query_job_for_metrics = query_job
+            rows = None
+        else:
+            job_config = typing.cast(
+                bigquery.QueryJobConfig,
+                bigquery.QueryJobConfig.from_api_repr(configuration),
+            )
+
+            # TODO(b/420984164): We may want to set a page_size here to limit
+            # the number of results in the first jobs.query response.
+            rows = self._start_query_with_job_optional(
+                query,
+                job_config=job_config,
+            )
+
+            # If there is a query job, fetch it so that we can get the
+            # statistics and destination table, if needed.
+            if rows.job_id and rows.location and rows.project:
+                query_job = cast(
+                    bigquery.QueryJob,
+                    self._bqclient.get_job(
+                        rows.job_id, project=rows.project, location=rows.location
+                    ),
+                )
+                destination = query_job.destination
+                query_job_for_metrics = query_job
+
+        # We split query execution from results fetching so that we can log
+        # metrics from either the query job, row iterator, or both.
         if self._metrics is not None:
-            self._metrics.count_job_stats(query_job)
+            self._metrics.count_job_stats(
+                query_job=query_job_for_metrics, row_iterator=rows
+            )
 
-        # If there was no destination table, that means the query must have
-        # been DDL or DML. Return some job metadata, instead.
+        # It's possible that there's no job and corresponding destination table.
+        # In this case, we must create a local node.
+        #
+        # TODO(b/420984164): Tune the threshold for which we download to
+        # local node. Likely there are a wide range of sizes in which it
+        # makes sense to download the results beyond the first page, even if
+        # there is a job and destination table available.
+        if rows is not None and destination is None:
+            return bf_read_gbq_query.create_dataframe_from_row_iterator(
+                rows,
+                session=self._session,
+            )
+
+        # If there was no destination table and we've made it this far, that
+        # means the query must have been DDL or DML. Return some job metadata,
+        # instead.
         if not destination:
-            return dataframe.DataFrame(
-                data=pandas.DataFrame(
-                    {
-                        "statement_type": [
-                            query_job.statement_type if query_job else "unknown"
-                        ],
-                        "job_id": [query_job.job_id if query_job else "unknown"],
-                        "location": [query_job.location if query_job else "unknown"],
-                    }
-                ),
+            return bf_read_gbq_query.create_dataframe_from_query_job_stats(
+                query_job_for_metrics,
                 session=self._session,
             )
 
@@ -872,9 +1021,12 @@ def _query_to_destination(
         # bother trying to do a CREATE TEMP TABLE ... AS SELECT ... statement.
         dry_run_config = bigquery.QueryJobConfig()
         dry_run_config.dry_run = True
-        _, dry_run_job = self._start_query(query, job_config=dry_run_config)
+        dry_run_job = self._start_query_with_job(
+            query,
+            job_config=dry_run_config,
+        )
         if dry_run_job.statement_type != "SELECT":
-            _, query_job = self._start_query(query)
+            query_job = self._start_query_with_job(query)
             return query_job.destination, query_job
 
         # Create a table to workaround BigQuery 10 GB query results limit. See:
@@ -908,7 +1060,7 @@ def _query_to_destination(
             # Write to temp table to workaround BigQuery 10 GB query results
             # limit. See: internal issue 303057336.
             job_config.labels["error_caught"] = "true"
-            _, query_job = self._start_query(
+            query_job = self._start_query_with_job(
                 query,
                 job_config=job_config,
                 timeout=timeout,
@@ -919,34 +1071,72 @@ def _query_to_destination(
             # tables as the destination. For example, if the query has a
             # top-level ORDER BY, this conflicts with our ability to cluster
             # the table by the index column(s).
-            _, query_job = self._start_query(query, timeout=timeout)
+            query_job = self._start_query_with_job(query, timeout=timeout)
             return query_job.destination, query_job
 
-    def _start_query(
+    def _prepare_job_config(
+        self,
+        job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None,
+    ) -> google.cloud.bigquery.QueryJobConfig:
+        job_config = bigquery.QueryJobConfig() if job_config is None else job_config
+
+        if bigframes.options.compute.maximum_bytes_billed is not None:
+            # Maybe this should be pushed down into start_query_with_client
+            job_config.maximum_bytes_billed = (
+                bigframes.options.compute.maximum_bytes_billed
+            )
+
+        return job_config
+
+    def _start_query_with_job_optional(
+        self,
+        sql: str,
+        *,
+        job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None,
+        timeout: Optional[float] = None,
+    ) -> google.cloud.bigquery.table.RowIterator:
+        """
+        Starts BigQuery query with job optional and waits for results.
+
+        Do not execute dataframe through this API, instead use the executor.
+        """
+        job_config = self._prepare_job_config(job_config)
+        rows, _ = bf_io_bigquery.start_query_with_client(
+            self._bqclient,
+            sql,
+            job_config=job_config,
+            timeout=timeout,
+            location=None,
+            project=None,
+            metrics=None,
+            query_with_job=False,
+        )
+        return rows
+
+    def _start_query_with_job(
         self,
         sql: str,
+        *,
         job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None,
         timeout: Optional[float] = None,
-    ) -> Tuple[google.cloud.bigquery.table.RowIterator, bigquery.QueryJob]:
+    ) -> bigquery.QueryJob:
         """
         Starts BigQuery query job and waits for results.
 
         Do not execute dataframe through this API, instead use the executor.
         """
-        job_config = bigquery.QueryJobConfig() if job_config is None else job_config
-        if bigframes.options.compute.maximum_bytes_billed is not None:
-            # Maybe this should be pushed down into start_query_with_client
-            job_config.maximum_bytes_billed = (
-                bigframes.options.compute.maximum_bytes_billed
-            )
-        iterator, query_job = bf_io_bigquery.start_query_with_client(
+        job_config = self._prepare_job_config(job_config)
+        _, query_job = bf_io_bigquery.start_query_with_client(
             self._bqclient,
             sql,
             job_config=job_config,
             timeout=timeout,
+            location=None,
+            project=None,
+            metrics=None,
+            query_with_job=True,
         )
-        assert query_job is not None
-        return iterator, query_job
+        return query_job
 
 
 def _transform_read_gbq_configuration(configuration: Optional[dict]) -> dict:
diff --git a/bigframes/session/local_scan_executor.py b/bigframes/session/local_scan_executor.py
index 88304fa181..b4d7b226e2 100644
--- a/bigframes/session/local_scan_executor.py
+++ b/bigframes/session/local_scan_executor.py
@@ -30,11 +30,17 @@ def execute(
         ordered: bool,
         peek: Optional[int] = None,
     ) -> Optional[executor.ExecuteResult]:
-        node = rewrite.try_reduce_to_local_scan(plan)
-        if not node:
+        reduced_result = rewrite.try_reduce_to_local_scan(plan)
+        if not reduced_result:
             return None
 
-        # TODO: Can support some slicing, sorting
+        node, limit = reduced_result
+
+        if limit is not None:
+            if peek is None or limit < peek:
+                peek = limit
+
+        # TODO: Can support some sorting
         offsets_col = node.offsets_col.sql if (node.offsets_col is not None) else None
         arrow_table = node.local_data_source.to_pyarrow_table(offsets_col=offsets_col)
         if peek:
@@ -46,8 +52,8 @@ def execute(
 
         arrow_table = arrow_table.select(needed_cols)
         arrow_table = arrow_table.rename_columns([id.sql for id in node.ids])
-
         total_rows = node.row_count
+
         if (peek is not None) and (total_rows is not None):
             total_rows = min(peek, total_rows)
 
diff --git a/bigframes/session/metrics.py b/bigframes/session/metrics.py
index 6a8038e189..48cb92a8b4 100644
--- a/bigframes/session/metrics.py
+++ b/bigframes/session/metrics.py
@@ -79,17 +79,24 @@ def get_performance_stats(
         return None
 
     bytes_processed = query_job.total_bytes_processed
-    if not isinstance(bytes_processed, int):
+    if bytes_processed and not isinstance(bytes_processed, int):
         return None  # filter out mocks
 
     slot_millis = query_job.slot_millis
-    if not isinstance(slot_millis, int):
+    if slot_millis and not isinstance(slot_millis, int):
         return None  # filter out mocks
 
     execution_secs = (query_job.ended - query_job.created).total_seconds()
     query_char_count = len(query_job.query)
 
-    return query_char_count, bytes_processed, slot_millis, execution_secs
+    return (
+        query_char_count,
+        # Not every job populates these. For example, slot_millis is missing
+        # from queries that came from cached results.
+        bytes_processed if bytes_processed else 0,
+        slot_millis if slot_millis else 0,
+        execution_secs,
+    )
 
 
 def write_stats_to_disk(
diff --git a/bigframes/session/polars_executor.py b/bigframes/session/polars_executor.py
new file mode 100644
index 0000000000..e215866874
--- /dev/null
+++ b/bigframes/session/polars_executor.py
@@ -0,0 +1,80 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from typing import Optional, TYPE_CHECKING
+
+import pyarrow as pa
+
+from bigframes.core import array_value, bigframe_node, local_data, nodes
+from bigframes.session import executor, semi_executor
+
+if TYPE_CHECKING:
+    import polars as pl
+
+
+_COMPATIBLE_NODES = (
+    nodes.ReadLocalNode,
+    nodes.OrderByNode,
+    nodes.ReversedNode,
+    nodes.SelectionNode,
+    nodes.FilterNode,  # partial support
+    nodes.ProjectionNode,  # partial support
+)
+
+
+class PolarsExecutor(semi_executor.SemiExecutor):
+    def __init__(self):
+        # This will error out if polars is not installed
+        from bigframes.core.compile.polars import PolarsCompiler
+
+        self._compiler = PolarsCompiler()
+
+    def execute(
+        self,
+        plan: bigframe_node.BigFrameNode,
+        ordered: bool,
+        peek: Optional[int] = None,
+    ) -> Optional[executor.ExecuteResult]:
+        if not self._can_execute(plan):
+            return None
+        # Note: Ignoring ordered flag, as just executing totally ordered is fine.
+        try:
+            lazy_frame: pl.LazyFrame = self._compiler.compile(
+                array_value.ArrayValue(plan)
+            )
+        except Exception:
+            return None
+        if peek is not None:
+            lazy_frame = lazy_frame.limit(peek)
+        pa_table = lazy_frame.collect().to_arrow()
+        return executor.ExecuteResult(
+            arrow_batches=iter(map(self._adapt_batch, pa_table.to_batches())),
+            schema=plan.schema,
+            total_bytes=pa_table.nbytes,
+            total_rows=pa_table.num_rows,
+        )
+
+    def _can_execute(self, plan: bigframe_node.BigFrameNode):
+        return all(isinstance(node, _COMPATIBLE_NODES) for node in plan.unique_nodes())
+
+    def _adapt_array(self, array: pa.Array) -> pa.Array:
+        target_type = local_data.logical_type_replacements(array.type)
+        if target_type != array.type:
+            return array.cast(target_type)
+        return array
+
+    def _adapt_batch(self, batch: pa.RecordBatch) -> pa.RecordBatch:
+        new_arrays = [self._adapt_array(arr) for arr in batch.columns]
+        return pa.RecordBatch.from_arrays(new_arrays, names=batch.column_names)
diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py
index ca6fa57d0b..7ddc2e2e6e 100644
--- a/bigframes/testing/mocks.py
+++ b/bigframes/testing/mocks.py
@@ -14,11 +14,14 @@
 
 import copy
 import datetime
-from typing import Any, Dict, Optional, Sequence
+from typing import Any, Dict, Literal, Optional, Sequence
 import unittest.mock as mock
 
+from bigframes_vendored.google_cloud_bigquery import _pandas_helpers
 import google.auth.credentials
 import google.cloud.bigquery
+import google.cloud.bigquery.table
+import pyarrow
 import pytest
 
 import bigframes
@@ -40,6 +43,7 @@ def create_bigquery_session(
     table_schema: Sequence[google.cloud.bigquery.SchemaField] = TEST_SCHEMA,
     anonymous_dataset: Optional[google.cloud.bigquery.DatasetReference] = None,
     location: str = "test-region",
+    ordering_mode: Literal["strict", "partial"] = "partial",
 ) -> bigframes.Session:
     """[Experimental] Create a mock BigQuery DataFrames session that avoids making Google Cloud API calls.
 
@@ -79,43 +83,75 @@ def create_bigquery_session(
     queries = []
     job_configs = []
 
-    def query_mock(query, *args, job_config=None, **kwargs):
+    def query_mock(
+        query,
+        *args,
+        job_config: Optional[google.cloud.bigquery.QueryJobConfig] = None,
+        **kwargs,
+    ):
         queries.append(query)
         job_configs.append(copy.deepcopy(job_config))
-        query_job = mock.create_autospec(google.cloud.bigquery.QueryJob)
+        query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True)
         query_job._properties = {}
         type(query_job).destination = mock.PropertyMock(
             return_value=anonymous_dataset.table("test_table"),
         )
-        type(query_job).session_info = google.cloud.bigquery.SessionInfo(
-            {"sessionInfo": {"sessionId": session_id}},
-        )
+        type(query_job).statement_type = mock.PropertyMock(return_value="SELECT")
+
+        if job_config is not None and job_config.create_session:
+            type(query_job).session_info = google.cloud.bigquery.SessionInfo(
+                {"sessionId": session_id},
+            )
 
         if query.startswith("SELECT CURRENT_TIMESTAMP()"):
             query_job.result = mock.MagicMock(return_value=[[bq_time]])
+        elif "CREATE TEMP TABLE".casefold() in query.casefold():
+            type(query_job).destination = mock.PropertyMock(
+                return_value=anonymous_dataset.table("temp_table_from_session"),
+            )
         else:
             type(query_job).schema = mock.PropertyMock(return_value=table_schema)
 
         return query_job
 
-    existing_query_and_wait = bqclient.query_and_wait
-
     def query_and_wait_mock(query, *args, job_config=None, **kwargs):
         queries.append(query)
         job_configs.append(copy.deepcopy(job_config))
+
         if query.startswith("SELECT CURRENT_TIMESTAMP()"):
             return iter([[datetime.datetime.now()]])
-        else:
-            return existing_query_and_wait(query, *args, **kwargs)
 
-    bqclient.query = query_mock
-    bqclient.query_and_wait = query_and_wait_mock
+        rows = mock.create_autospec(
+            google.cloud.bigquery.table.RowIterator, instance=True
+        )
+        row = mock.create_autospec(google.cloud.bigquery.table.Row, instance=True)
+        rows.__iter__.return_value = [row]
+        type(rows).schema = mock.PropertyMock(return_value=table_schema)
+        rows.to_arrow.return_value = pyarrow.Table.from_pydict(
+            {field.name: [None] for field in table_schema},
+            schema=pyarrow.schema(
+                _pandas_helpers.bq_to_arrow_field(field) for field in table_schema
+            ),
+        )
+
+        if job_config is not None and job_config.destination is None:
+            # Assume that the query finishes fast enough for jobless mode.
+            type(rows).job_id = mock.PropertyMock(return_value=None)
+
+        return rows
+
+    bqclient.query.side_effect = query_mock
+    bqclient.query_and_wait.side_effect = query_and_wait_mock
 
     clients_provider = mock.create_autospec(bigframes.session.clients.ClientsProvider)
     type(clients_provider).bqclient = mock.PropertyMock(return_value=bqclient)
     clients_provider._credentials = credentials
 
-    bqoptions = bigframes.BigQueryOptions(credentials=credentials, location=location)
+    bqoptions = bigframes.BigQueryOptions(
+        credentials=credentials,
+        location=location,
+        ordering_mode=ordering_mode,
+    )
     session = bigframes.Session(context=bqoptions, clients_provider=clients_provider)
     session._bq_connection_manager = mock.create_autospec(
         bigframes.clients.BqConnectionManager, instance=True
diff --git a/bigframes/testing/polars_session.py b/bigframes/testing/polars_session.py
index f8dda8da55..5e5de2d0b2 100644
--- a/bigframes/testing/polars_session.py
+++ b/bigframes/testing/polars_session.py
@@ -20,12 +20,9 @@
 import polars
 
 import bigframes
-import bigframes.clients
 import bigframes.core.blocks
 import bigframes.core.compile.polars
-import bigframes.core.ordering
 import bigframes.dataframe
-import bigframes.session.clients
 import bigframes.session.executor
 import bigframes.session.metrics
 
@@ -35,6 +32,26 @@
 class TestExecutor(bigframes.session.executor.Executor):
     compiler = bigframes.core.compile.polars.PolarsCompiler()
 
+    def peek(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        n_rows: int,
+        use_explicit_destination: Optional[bool] = False,
+    ):
+        """
+        A 'peek' efficiently accesses a small number of rows in the dataframe.
+        """
+        lazy_frame: polars.LazyFrame = self.compiler.compile(array_value)
+        pa_table = lazy_frame.collect().limit(n_rows).to_arrow()
+        # Currently, pyarrow types might not quite be exactly the ones in the bigframes schema.
+        # Nullability may be different, and might use large versions of list, string datatypes.
+        return bigframes.session.executor.ExecuteResult(
+            arrow_batches=pa_table.to_batches(),
+            schema=array_value.schema,
+            total_bytes=pa_table.nbytes,
+            total_rows=pa_table.num_rows,
+        )
+
     def execute(
         self,
         array_value: bigframes.core.ArrayValue,
@@ -58,6 +75,14 @@ def execute(
             total_rows=pa_table.num_rows,
         )
 
+    def cached(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        *,
+        config,
+    ) -> None:
+        return
+
 
 class TestSession(bigframes.session.Session):
     def __init__(self):
@@ -92,3 +117,8 @@ def read_pandas(self, pandas_dataframe, write_engine="default"):
             pandas_dataframe = pandas_dataframe.to_frame()
         local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self)
         return bigframes.dataframe.DataFrame(local_block)
+
+    @property
+    def bqclient(self):
+        # prevents logger from trying to call bq upon any errors
+        return None
diff --git a/bigframes/version.py b/bigframes/version.py
index 6cc3d952ed..e41364d4d1 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.5.0"
+__version__ = "2.6.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-05-30"
+__release_date__ = "2025-06-09"
 # {x-release-please-end}
diff --git a/noxfile.py b/noxfile.py
index 297e8f9d6f..dee5f929b7 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -79,7 +79,7 @@
 UNIT_TEST_DEPENDENCIES: List[str] = []
 UNIT_TEST_EXTRAS: List[str] = ["tests"]
 UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
-    "3.12": ["polars", "scikit-learn"],
+    "3.12": ["tests", "polars", "scikit-learn"],
 }
 
 # 3.10 is needed for Windows tests as it is the only version installed in the
@@ -108,8 +108,8 @@
 SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
     "3.9": ["tests"],
     "3.10": ["tests"],
-    "3.12": ["tests", "scikit-learn"],
-    "3.13": ["tests"],
+    "3.12": ["tests", "scikit-learn", "polars"],
+    "3.13": ["tests", "polars"],
 }
 
 LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
@@ -202,14 +202,11 @@ def install_unittest_dependencies(session, install_test_extra, *constraints):
     if UNIT_TEST_LOCAL_DEPENDENCIES:
         session.install(*UNIT_TEST_LOCAL_DEPENDENCIES, *constraints)
 
-    if install_test_extra and UNIT_TEST_EXTRAS_BY_PYTHON:
-        extras = UNIT_TEST_EXTRAS_BY_PYTHON.get(session.python, [])
-    if install_test_extra and UNIT_TEST_EXTRAS:
-        extras = UNIT_TEST_EXTRAS
-    else:
-        extras = []
-
-    if extras:
+    if install_test_extra:
+        if session.python in UNIT_TEST_EXTRAS_BY_PYTHON:
+            extras = UNIT_TEST_EXTRAS_BY_PYTHON[session.python]
+        else:
+            extras = UNIT_TEST_EXTRAS
         session.install("-e", f".[{','.join(extras)}]", *constraints)
     else:
         session.install("-e", ".", *constraints)
diff --git a/scripts/data/audio/audio_LJ001-0010.wav b/scripts/data/audio/audio_LJ001-0010.wav
new file mode 100644
index 0000000000..01a2e68829
Binary files /dev/null and b/scripts/data/audio/audio_LJ001-0010.wav differ
diff --git a/scripts/data/pdfs/pdfs_sample-local-pdf.pdf b/scripts/data/pdfs/pdfs_sample-local-pdf.pdf
new file mode 100644
index 0000000000..d162cd6877
Binary files /dev/null and b/scripts/data/pdfs/pdfs_sample-local-pdf.pdf differ
diff --git a/scripts/data/pdfs/test-protected.pdf b/scripts/data/pdfs/test-protected.pdf
new file mode 100644
index 0000000000..0d8cd28baa
Binary files /dev/null and b/scripts/data/pdfs/test-protected.pdf differ
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 824e774dbe..a4bab1bcfe 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -1521,3 +1521,17 @@ def pdf_mm_df(
     pdf_gcs_path, session: bigframes.Session, bq_connection: str
 ) -> bpd.DataFrame:
     return session.from_glob_path(pdf_gcs_path, name="pdf", connection=bq_connection)
+
+
+@pytest.fixture(scope="session")
+def audio_gcs_path() -> str:
+    return "gs://bigframes_blob_test/audio/*"
+
+
+@pytest.fixture(scope="session")
+def audio_mm_df(
+    audio_gcs_path, session: bigframes.Session, bq_connection: str
+) -> bpd.DataFrame:
+    return session.from_glob_path(
+        audio_gcs_path, name="audio", connection=bq_connection
+    )
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
index 3ebded3d29..4a95e4c6d1 100644
--- a/tests/system/large/blob/test_function.py
+++ b/tests/system/large/blob/test_function.py
@@ -385,3 +385,54 @@ def test_blob_pdf_chunk(
         check_dtype=False,
         check_index=False,
     )
+
+
+@pytest.mark.parametrize(
+    "model_name, verbose",
+    [
+        ("gemini-2.0-flash-001", True),
+        ("gemini-2.0-flash-001", False),
+        ("gemini-2.0-flash-lite-001", True),
+        ("gemini-2.0-flash-lite-001", False),
+    ],
+)
+def test_blob_transcribe(
+    audio_mm_df: bpd.DataFrame,
+    model_name: str,
+    verbose: bool,
+):
+    actual = (
+        audio_mm_df["audio"]
+        .blob.audio_transcribe(
+            model_name=model_name,
+            verbose=verbose,
+        )
+        .to_pandas()
+    )
+
+    # check relative length
+    expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress"
+    expected_len = len(expected_text)
+
+    actual_text = ""
+    if verbose:
+        actual_text = actual[0]["content"]
+    else:
+        actual_text = actual[0]
+    actual_len = len(actual_text)
+
+    relative_length_tolerance = 0.2
+    min_acceptable_len = expected_len * (1 - relative_length_tolerance)
+    max_acceptable_len = expected_len * (1 + relative_length_tolerance)
+    assert min_acceptable_len <= actual_len <= max_acceptable_len, (
+        f"Item (verbose={verbose}): Transcribed text length {actual_len} is outside the acceptable range "
+        f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. "
+        f"Expected reference length was {expected_len}. "
+    )
+
+    # check for major keywords
+    major_keywords = ["book", "picture"]
+    for keyword in major_keywords:
+        assert (
+            keyword.lower() in actual_text.lower()
+        ), f"Item (verbose={verbose}): Expected keyword '{keyword}' not found in transcribed text. "
diff --git a/tests/system/large/operations/test_ai.py b/tests/system/large/operations/test_ai.py
index 1b1d3a3376..c0716220b1 100644
--- a/tests/system/large/operations/test_ai.py
+++ b/tests/system/large/operations/test_ai.py
@@ -398,6 +398,33 @@ def test_map_invalid_model_raise_error():
         )
 
 
+def test_classify(gemini_flash_model, session):
+    df = dataframe.DataFrame(data={"creature": ["dog", "rose"]}, session=session)
+
+    with bigframes.option_context(
+        AI_OP_EXP_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        10,
+    ):
+        actual_result = df.ai.classify(
+            "{creature}",
+            gemini_flash_model,
+            labels=["animal", "plant"],
+            output_column="result",
+        ).to_pandas()
+
+    expected_result = pd.DataFrame(
+        {
+            "creature": ["dog", "rose"],
+            "result": ["animal", "plant"],
+        }
+    )
+    pandas.testing.assert_frame_equal(
+        actual_result, expected_result, check_index_type=False, check_dtype=False
+    )
+
+
 @pytest.mark.parametrize(
     "instruction",
     [
diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py
index be517fb5cc..f888fd0364 100644
--- a/tests/system/small/bigquery/test_geo.py
+++ b/tests/system/small/bigquery/test_geo.py
@@ -19,10 +19,14 @@
 from shapely.geometry import (  # type: ignore
     GeometryCollection,
     LineString,
+    MultiLineString,
+    MultiPoint,
+    MultiPolygon,
     Point,
     Polygon,
 )
 
+from bigframes.bigquery import st_length
 import bigframes.bigquery as bbq
 import bigframes.geopandas
 
@@ -59,6 +63,66 @@ def test_geo_st_area():
     )
 
 
+# Expected length for 1 degree of longitude at the equator is approx 111195.079734 meters
+DEG_LNG_EQUATOR_METERS = 111195.07973400292
+
+
+def test_st_length_various_geometries(session):
+    input_geometries = [
+        Point(0, 0),
+        LineString([(0, 0), (1, 0)]),
+        Polygon([(0, 0), (1, 0), (0, 1), (0, 0)]),
+        MultiPoint([Point(0, 0), Point(1, 1)]),
+        MultiLineString([LineString([(0, 0), (1, 0)]), LineString([(0, 0), (0, 1)])]),
+        MultiPolygon(
+            [
+                Polygon([(0, 0), (1, 0), (0, 1), (0, 0)]),
+                Polygon([(2, 2), (3, 2), (2, 3), (2, 2)]),
+            ]
+        ),
+        GeometryCollection([Point(0, 0), LineString([(0, 0), (1, 0)])]),
+        GeometryCollection([]),
+        None,  # Represents NULL geography input
+        GeometryCollection([Point(1, 1), Point(2, 2)]),
+    ]
+    geoseries = bigframes.geopandas.GeoSeries(input_geometries, session=session)
+
+    expected_lengths = pd.Series(
+        [
+            0.0,  # Point
+            DEG_LNG_EQUATOR_METERS,  # LineString
+            0.0,  # Polygon
+            0.0,  # MultiPoint
+            2 * DEG_LNG_EQUATOR_METERS,  # MultiLineString
+            0.0,  # MultiPolygon
+            DEG_LNG_EQUATOR_METERS,  # GeometryCollection (Point + LineString)
+            0.0,  # Empty GeometryCollection
+            pd.NA,  # None input for ST_LENGTH(NULL) is NULL
+            0.0,  # GeometryCollection (Point + Point)
+        ],
+        index=pd.Index(range(10), dtype="Int64"),
+        dtype="Float64",
+    )
+
+    # Test default use_spheroid
+    result_default = st_length(geoseries).to_pandas()
+    pd.testing.assert_series_equal(
+        result_default,
+        expected_lengths,
+        rtol=1e-3,
+        atol=1e-3,  # For comparisons involving 0.0
+    )  # type: ignore
+
+    # Test explicit use_spheroid=False
+    result_explicit_false = st_length(geoseries, use_spheroid=False).to_pandas()
+    pd.testing.assert_series_equal(
+        result_explicit_false,
+        expected_lengths,
+        rtol=1e-3,
+        atol=1e-3,  # For comparisons involving 0.0
+    )  # type: ignore
+
+
 def test_geo_st_difference_with_geometry_objects():
     data1 = [
         Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]),
@@ -354,3 +418,40 @@ def test_geo_st_intersection_with_similar_geometry_objects():
         check_exact=False,
         rtol=0.1,
     )
+
+
+def test_geo_st_isclosed():
+    bf_gs = bigframes.geopandas.GeoSeries(
+        [
+            Point(0, 0),  # Point
+            LineString([(0, 0), (1, 1)]),  # Open LineString
+            LineString([(0, 0), (1, 1), (0, 1), (0, 0)]),  # Closed LineString
+            Polygon([(0, 0), (1, 1), (0, 1)]),  # Open polygon
+            GeometryCollection(),  # Empty GeometryCollection
+            bigframes.geopandas.GeoSeries.from_wkt(["GEOMETRYCOLLECTION EMPTY"]).iloc[
+                0
+            ],  # Also empty
+            None,  # Should be filtered out by dropna
+        ],
+        index=[0, 1, 2, 3, 4, 5, 6],
+    )
+    bf_result = bbq.st_isclosed(bf_gs).to_pandas()
+
+    # Expected results based on ST_ISCLOSED documentation:
+    expected_data = [
+        True,  # Point: True
+        False,  # Open LineString: False
+        True,  # Closed LineString: True
+        False,  # Polygon: False (only True if it's a full polygon)
+        False,  # Empty GeometryCollection: False (An empty GEOGRAPHY isn't closed)
+        False,  # GEOMETRYCOLLECTION EMPTY: False
+        None,
+    ]
+    expected_series = pd.Series(data=expected_data, dtype="boolean")
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        expected_series,
+        # We default to Int64 (nullable) dtype, but pandas defaults to int64 index.
+        check_index_type=False,
+    )
diff --git a/tests/system/small/engines/__init__.py b/tests/system/small/engines/__init__.py
new file mode 100644
index 0000000000..0a2669d7a2
--- /dev/null
+++ b/tests/system/small/engines/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/system/small/engines/conftest.py b/tests/system/small/engines/conftest.py
new file mode 100644
index 0000000000..2a72cb2196
--- /dev/null
+++ b/tests/system/small/engines/conftest.py
@@ -0,0 +1,81 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pathlib
+from typing import Generator
+
+from google.cloud import bigquery
+import pandas as pd
+import pytest
+
+import bigframes
+from bigframes.core import local_data
+from bigframes.session import (
+    direct_gbq_execution,
+    local_scan_executor,
+    polars_executor,
+    semi_executor,
+)
+
+CURRENT_DIR = pathlib.Path(__file__).parent
+DATA_DIR = CURRENT_DIR.parent.parent.parent / "data"
+
+
+@pytest.fixture(scope="module")
+def fake_session() -> Generator[bigframes.Session, None, None]:
+    import bigframes.core.global_session
+
+    # its a "polars session", but we are bypassing session-provided execution
+    # we just want a minimal placeholder session without expensive setup
+    from bigframes.testing import polars_session
+
+    session = polars_session.TestSession()
+    with bigframes.core.global_session._GlobalSessionContext(session):
+        yield session
+
+
+@pytest.fixture(scope="session", params=["pyarrow", "polars", "bq"])
+def engine(request, bigquery_client: bigquery.Client) -> semi_executor.SemiExecutor:
+    if request.param == "pyarrow":
+        return local_scan_executor.LocalScanExecutor()
+    if request.param == "polars":
+        return polars_executor.PolarsExecutor()
+    if request.param == "bq":
+        return direct_gbq_execution.DirectGbqExecutor(bigquery_client)
+    raise ValueError(f"Unrecognized param: {request.param}")
+
+
+@pytest.fixture(scope="module")
+def managed_data_source(
+    scalars_pandas_df_index: pd.DataFrame,
+) -> local_data.ManagedArrowTable:
+    return local_data.ManagedArrowTable.from_pandas(scalars_pandas_df_index)
+
+
+@pytest.fixture(scope="module")
+def zero_row_source() -> local_data.ManagedArrowTable:
+    return local_data.ManagedArrowTable.from_pandas(pd.DataFrame({"a": [], "b": []}))
+
+
+@pytest.fixture(scope="module")
+def nested_data_source(
+    nested_pandas_df: pd.DataFrame,
+) -> local_data.ManagedArrowTable:
+    return local_data.ManagedArrowTable.from_pandas(nested_pandas_df)
+
+
+@pytest.fixture(scope="module")
+def repeated_data_source(
+    repeated_pandas_df: pd.DataFrame,
+) -> local_data.ManagedArrowTable:
+    return local_data.ManagedArrowTable.from_pandas(repeated_pandas_df)
diff --git a/tests/system/small/engines/test_read_local.py b/tests/system/small/engines/test_read_local.py
new file mode 100644
index 0000000000..7bf1316a44
--- /dev/null
+++ b/tests/system/small/engines/test_read_local.py
@@ -0,0 +1,132 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes
+from bigframes.core import identifiers, local_data, nodes
+from bigframes.session import polars_executor, semi_executor
+
+pytest.importorskip("polars")
+
+# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
+REFERENCE_ENGINE = polars_executor.PolarsExecutor()
+
+
+def ensure_equivalence(
+    node: nodes.BigFrameNode,
+    engine1: semi_executor.SemiExecutor,
+    engine2: semi_executor.SemiExecutor,
+):
+    e1_result = engine1.execute(node, ordered=True)
+    e2_result = engine2.execute(node, ordered=True)
+    assert e1_result is not None
+    assert e2_result is not None
+    # Schemas might have extra nullity markers, normalize to node expected schema, which should be looser
+    e1_table = e1_result.to_arrow_table().cast(node.schema.to_pyarrow())
+    e2_table = e2_result.to_arrow_table().cast(node.schema.to_pyarrow())
+    assert e1_table.equals(e2_table), f"{e1_table} is not equal to {e2_table}"
+
+
+def test_engines_read_local(
+    fake_session: bigframes.Session,
+    managed_data_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in managed_data_source.schema.items
+    )
+    local_node = nodes.ReadLocalNode(
+        managed_data_source, scan_list, fake_session, offsets_col=None
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
+
+
+def test_engines_read_local_w_offsets(
+    fake_session: bigframes.Session,
+    managed_data_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in managed_data_source.schema.items
+    )
+    local_node = nodes.ReadLocalNode(
+        managed_data_source,
+        scan_list,
+        fake_session,
+        offsets_col=identifiers.ColumnId("offsets"),
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
+
+
+def test_engines_read_local_w_col_subset(
+    fake_session: bigframes.Session,
+    managed_data_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in managed_data_source.schema.items[::-2]
+    )
+    local_node = nodes.ReadLocalNode(
+        managed_data_source, scan_list, fake_session, offsets_col=None
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
+
+
+def test_engines_read_local_w_zero_row_source(
+    fake_session: bigframes.Session,
+    zero_row_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in zero_row_source.schema.items
+    )
+    local_node = nodes.ReadLocalNode(
+        zero_row_source, scan_list, fake_session, offsets_col=None
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
+
+
+def test_engines_read_local_w_nested_source(
+    fake_session: bigframes.Session,
+    nested_data_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in nested_data_source.schema.items
+    )
+    local_node = nodes.ReadLocalNode(
+        nested_data_source, scan_list, fake_session, offsets_col=None
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
+
+
+def test_engines_read_local_w_repeated_source(
+    fake_session: bigframes.Session,
+    repeated_data_source: local_data.ManagedArrowTable,
+    engine,
+):
+    scan_list = nodes.ScanList.from_items(
+        nodes.ScanItem(identifiers.ColumnId(item.column), item.dtype, item.column)
+        for item in repeated_data_source.schema.items
+    )
+    local_node = nodes.ReadLocalNode(
+        repeated_data_source, scan_list, fake_session, offsets_col=None
+    )
+    ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py
index 51e0459014..7fc7caf2fc 100644
--- a/tests/system/small/functions/test_remote_function.py
+++ b/tests/system/small/functions/test_remote_function.py
@@ -764,6 +764,11 @@ def test_read_gbq_function_runs_existing_udf_array_output(session, routine_id_un
             """
         ),
         job_config=bigquery.QueryJobConfig(),
+        location=None,
+        project=None,
+        timeout=None,
+        metrics=None,
+        query_with_job=True,
     )
     func = session.read_gbq_function(routine_id_unique)
 
@@ -797,6 +802,11 @@ def test_read_gbq_function_runs_existing_udf_2_params_array_output(
             """
         ),
         job_config=bigquery.QueryJobConfig(),
+        location=None,
+        project=None,
+        timeout=None,
+        metrics=None,
+        query_with_job=True,
     )
     func = session.read_gbq_function(routine_id_unique)
 
@@ -832,6 +842,11 @@ def test_read_gbq_function_runs_existing_udf_4_params_array_output(
             """
         ),
         job_config=bigquery.QueryJobConfig(),
+        location=None,
+        project=None,
+        timeout=None,
+        metrics=None,
+        query_with_job=True,
     )
     func = session.read_gbq_function(routine_id_unique)
 
diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py
index ae99fd6fc2..36dd070ef5 100644
--- a/tests/system/small/geopandas/test_geoseries.py
+++ b/tests/system/small/geopandas/test_geoseries.py
@@ -96,6 +96,17 @@ def test_geo_area_not_supported():
         bf_series.area
 
 
+def test_geoseries_length_property_not_implemented(session):
+    gs = bigframes.geopandas.GeoSeries([Point(0, 0)], session=session)
+    with pytest.raises(
+        NotImplementedError,
+        match=re.escape(
+            "GeoSeries.length is not yet implemented. Please use bigframes.bigquery.st_length(geoseries) instead."
+        ),
+    ):
+        _ = gs.length
+
+
 def test_geo_distance_not_supported():
     s1 = bigframes.pandas.Series(
         [
diff --git a/tests/system/small/operations/test_ai.py b/tests/system/small/operations/test_ai.py
index 25d411bef8..83aca8b5b1 100644
--- a/tests/system/small/operations/test_ai.py
+++ b/tests/system/small/operations/test_ai.py
@@ -108,6 +108,65 @@ def test_map(session):
     )
 
 
+def test_classify(session):
+    df = dataframe.DataFrame({"col": ["A", "B"]}, session=session)
+    model = FakeGeminiTextGenerator(
+        dataframe.DataFrame(
+            {
+                "result": ["A", "B"],
+                "full_response": _create_dummy_full_response(2),
+            },
+            session=session,
+        ),
+    )
+
+    with bigframes.option_context(
+        AI_OP_EXP_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        50,
+    ):
+        result = df.ai.classify(
+            "classify {col}", model=model, labels=["A", "B"]
+        ).to_pandas()
+
+    pandas.testing.assert_frame_equal(
+        result,
+        pd.DataFrame(
+            {"col": ["A", "B"], "result": ["A", "B"]}, dtype=dtypes.STRING_DTYPE
+        ),
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "labels",
+    [
+        pytest.param([], id="empty-label"),
+        pytest.param(["A", "A", "B"], id="duplicate-labels"),
+    ],
+)
+def test_classify_invalid_labels_raise_error(session, labels):
+    df = dataframe.DataFrame({"col": ["A", "B"]}, session=session)
+    model = FakeGeminiTextGenerator(
+        dataframe.DataFrame(
+            {
+                "result": ["A", "B"],
+                "full_response": _create_dummy_full_response(2),
+            },
+            session=session,
+        ),
+    )
+
+    with bigframes.option_context(
+        AI_OP_EXP_OPTION,
+        True,
+        THRESHOLD_OPTION,
+        50,
+    ), pytest.raises(ValueError):
+        df.ai.classify("classify {col}", model=model, labels=labels)
+
+
 def test_join(session):
     left_df = dataframe.DataFrame({"col_A": ["A"]}, session=session)
     right_df = dataframe.DataFrame({"col_B": ["B"]}, session=session)
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 032d93c19d..8801faf657 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -325,6 +325,11 @@ def test_isalpha(weird_strings, weird_strings_pd):
     )
 
 
+@pytest.mark.skipif(
+    "dev" in pa.__version__,
+    # b/333484335 pyarrow is inconsistent on the behavior
+    reason="pyarrow dev version is inconsistent on isdigit behavior.",
+)
 def test_isdigit(weird_strings, weird_strings_pd):
     pd_result = weird_strings_pd.str.isdigit()
     bf_result = weird_strings.str.isdigit().to_pandas()
diff --git a/tests/system/small/session/test_read_gbq_colab.py b/tests/system/small/session/test_read_gbq_colab.py
index a821901e4c..0992a10055 100644
--- a/tests/system/small/session/test_read_gbq_colab.py
+++ b/tests/system/small/session/test_read_gbq_colab.py
@@ -19,18 +19,22 @@
 
 
 def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_session):
+    # This query should return enough results to be too big to fit in a single
+    # page from jobs.query.
     executions_before_sql = maybe_ordered_session._metrics.execution_count
     df = maybe_ordered_session._read_gbq_colab(
         """
         SELECT
             name,
+            state,
+            gender,
+            year,
             SUM(number) AS total
         FROM
             `bigquery-public-data.usa_names.usa_1910_2013`
         WHERE state LIKE 'W%'
-        GROUP BY name
+        GROUP BY name, state, gender, year
         ORDER BY total DESC
-        LIMIT 300
         """
     )
     executions_before_python = maybe_ordered_session._metrics.execution_count
@@ -39,12 +43,17 @@ def test_read_gbq_colab_to_pandas_batches_preserves_order_by(maybe_ordered_sessi
     )
     executions_after = maybe_ordered_session._metrics.execution_count
 
-    total_rows = 0
+    num_batches = 0
     for batch in batches:
         assert batch["total"].is_monotonic_decreasing
-        total_rows += len(batch.index)
+        assert len(batch.index) == 100
+        num_batches += 1
+
+        # Only test the first few pages to avoid downloading unnecessary data
+        # and so we can confirm we have full pages in each batch.
+        if num_batches >= 3:
+            break
 
-    assert total_rows > 0
     assert executions_after == executions_before_python == executions_before_sql + 1
 
 
@@ -103,6 +112,9 @@ def test_read_gbq_colab_includes_formatted_scalars(session):
         # This is not a supported type, but ignored if not referenced.
         "some_object": object(),
     }
+
+    # This query should return few enough results to be small enough to fit in a
+    # single page from jobs.query.
     df = session._read_gbq_colab(
         """
         SELECT {some_integer} as some_integer,
@@ -124,6 +136,7 @@ def test_read_gbq_colab_includes_formatted_scalars(session):
                 "escaped": pandas.Series(["{escaped}"], dtype="string[pyarrow]"),
             }
         ),
+        check_index_type=False,  # int64 vs Int64
     )
 
 
@@ -152,4 +165,8 @@ def test_read_gbq_colab_includes_formatted_bigframes_dataframe(
         .assign(int64_col=scalars_pandas_df_index["int64_too"])
         .reset_index(drop=False)[["int64_col", "rowindex"]]
     )
-    pandas.testing.assert_frame_equal(result, expected)
+    pandas.testing.assert_frame_equal(
+        result,
+        expected,
+        check_index_type=False,  # int64 vs Int64
+    )
diff --git a/tests/system/small/test_encryption.py b/tests/system/small/test_encryption.py
index 97f44694b0..1ba8ed7e09 100644
--- a/tests/system/small/test_encryption.py
+++ b/tests/system/small/test_encryption.py
@@ -70,7 +70,7 @@ def test_session_query_job(bq_cmek, session_with_bq_cmek):
     if not bq_cmek:  # pragma: NO COVER
         pytest.skip("no cmek set for testing")  # pragma: NO COVER
 
-    _, query_job = session_with_bq_cmek._loader._start_query(
+    query_job = session_with_bq_cmek._loader._start_query_with_job(
         "SELECT 123", job_config=bigquery.QueryJobConfig(use_query_cache=False)
     )
     query_job.result()
diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py
index 9f45c8465b..7643f5701b 100644
--- a/tests/system/small/test_index.py
+++ b/tests/system/small/test_index.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import re
+
 import numpy
 import pandas as pd
 import pytest
@@ -375,7 +377,7 @@ def test_index_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep):
     )
 
 
-def test_index_isin(scalars_df_index, scalars_pandas_df_index):
+def test_index_isin_list(scalars_df_index, scalars_pandas_df_index):
     col_name = "int64_col"
     bf_series = (
         scalars_df_index.set_index(col_name).index.isin([2, 55555, 4]).to_pandas()
@@ -389,6 +391,38 @@ def test_index_isin(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_index_isin_bf_series(scalars_df_index, scalars_pandas_df_index, session):
+    col_name = "int64_col"
+    bf_series = (
+        scalars_df_index.set_index(col_name)
+        .index.isin(bpd.Series([2, 55555, 4], session=session))
+        .to_pandas()
+    )
+    pd_result_array = scalars_pandas_df_index.set_index(col_name).index.isin(
+        [2, 55555, 4]
+    )
+    pd.testing.assert_index_equal(
+        pd.Index(pd_result_array).set_names(col_name),
+        bf_series,
+    )
+
+
+def test_index_isin_bf_index(scalars_df_index, scalars_pandas_df_index, session):
+    col_name = "int64_col"
+    bf_series = (
+        scalars_df_index.set_index(col_name)
+        .index.isin(bpd.Index([2, 55555, 4], session=session))
+        .to_pandas()
+    )
+    pd_result_array = scalars_pandas_df_index.set_index(col_name).index.isin(
+        [2, 55555, 4]
+    )
+    pd.testing.assert_index_equal(
+        pd.Index(pd_result_array).set_names(col_name),
+        bf_series,
+    )
+
+
 def test_multiindex_name_is_none(session):
     df = pd.DataFrame(
         {
@@ -426,3 +460,42 @@ def test_multiindex_repr_includes_all_names(session):
     )
     index = session.read_pandas(df).set_index(["A", "B"]).index
     assert "names=['A', 'B']" in repr(index)
+
+
+def test_index_item(session):
+    # Test with a single item
+    bf_idx_single = bpd.Index([42], session=session)
+    pd_idx_single = pd.Index([42])
+    assert bf_idx_single.item() == pd_idx_single.item()
+
+
+def test_index_item_with_multiple(session):
+    # Test with multiple items
+    bf_idx_multiple = bpd.Index([1, 2, 3], session=session)
+    pd_idx_multiple = pd.Index([1, 2, 3])
+
+    try:
+        pd_idx_multiple.item()
+    except ValueError as e:
+        expected_message = str(e)
+    else:
+        raise AssertionError("Expected ValueError from pandas, but didn't get one")
+
+    with pytest.raises(ValueError, match=re.escape(expected_message)):
+        bf_idx_multiple.item()
+
+
+def test_index_item_with_empty(session):
+    # Test with an empty Index
+    bf_idx_empty = bpd.Index([], dtype="Int64", session=session)
+    pd_idx_empty: pd.Index = pd.Index([], dtype="Int64")
+
+    try:
+        pd_idx_empty.item()
+    except ValueError as e:
+        expected_message = str(e)
+    else:
+        raise AssertionError("Expected ValueError from pandas, but didn't get one")
+
+    with pytest.raises(ValueError, match=re.escape(expected_message)):
+        bf_idx_empty.item()
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 710e1481be..10671720af 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -629,6 +629,18 @@ def test_series_replace_list_scalar(scalars_dfs):
     )
 
 
+def test_series_replace_nans_with_pd_na(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "string_col"
+    bf_result = scalars_df[col_name].replace({pd.NA: "UNKNOWN"}).to_pandas()
+    pd_result = scalars_pandas_df[col_name].replace({pd.NA: "UNKNOWN"})
+
+    pd.testing.assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("replacement_dict",),
     (
@@ -1368,6 +1380,24 @@ def test_isin_bigframes_values(scalars_dfs, col_name, test_set, session):
     )
 
 
+def test_isin_bigframes_index(scalars_dfs, session):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (
+        scalars_df["string_col"]
+        .isin(bigframes.pandas.Index(["Hello, World!", "Hi", "こんにちは"], session=session))
+        .to_pandas()
+    )
+    pd_result = (
+        scalars_pandas_df["string_col"]
+        .isin(pd.Index(["Hello, World!", "Hi", "こんにちは"]))
+        .astype("boolean")
+    )
+    pd.testing.assert_series_equal(
+        pd_result,
+        bf_result,
+    )
+
+
 @pytest.mark.parametrize(
     (
         "col_name",
@@ -4255,13 +4285,16 @@ def test_apply_lambda(scalars_dfs, col, lambda_):
     bf_result = bf_col.apply(lambda_, by_row=False).to_pandas()
 
     pd_col = scalars_pandas_df[col]
-    if pd.__version__.startswith("2.2"):
+    if pd.__version__[:3] in ("2.2", "2.3"):
         pd_result = pd_col.apply(lambda_, by_row=False)
     else:
         pd_result = pd_col.apply(lambda_)
 
     # ignore dtype check, which are Int64 and object respectively
-    assert_series_equal(bf_result, pd_result, check_dtype=False)
+    # Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
+    )
 
 
 @pytest.mark.parametrize(
@@ -4345,13 +4378,16 @@ def foo(x):
 
     pd_col = scalars_pandas_df["int64_col"]
 
-    if pd.__version__.startswith("2.2"):
+    if pd.__version__[:3] in ("2.2", "2.3"):
         pd_result = pd_col.apply(foo, by_row=False)
     else:
         pd_result = pd_col.apply(foo)
 
     # ignore dtype check, which are Int64 and object respectively
-    assert_series_equal(bf_result, pd_result, check_dtype=False)
+    # Some columns implicitly convert to floating point. Use check_exact=False to ensure we're "close enough"
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_exact=False, rtol=0.001
+    )
 
 
 @pytest.mark.parametrize(
@@ -4606,3 +4642,42 @@ def test_series_to_pandas_dry_run(scalars_df_index):
 
     assert isinstance(result, pd.Series)
     assert len(result) > 0
+
+
+def test_series_item(session):
+    # Test with a single item
+    bf_s_single = bigframes.pandas.Series([42], session=session)
+    pd_s_single = pd.Series([42])
+    assert bf_s_single.item() == pd_s_single.item()
+
+
+def test_series_item_with_multiple(session):
+    # Test with multiple items
+    bf_s_multiple = bigframes.pandas.Series([1, 2, 3], session=session)
+    pd_s_multiple = pd.Series([1, 2, 3])
+
+    try:
+        pd_s_multiple.item()
+    except ValueError as e:
+        expected_message = str(e)
+    else:
+        raise AssertionError("Expected ValueError from pandas, but didn't get one")
+
+    with pytest.raises(ValueError, match=re.escape(expected_message)):
+        bf_s_multiple.item()
+
+
+def test_series_item_with_empty(session):
+    # Test with an empty Series
+    bf_s_empty = bigframes.pandas.Series([], dtype="Int64", session=session)
+    pd_s_empty = pd.Series([], dtype="Int64")
+
+    try:
+        pd_s_empty.item()
+    except ValueError as e:
+        expected_message = str(e)
+    else:
+        raise AssertionError("Expected ValueError from pandas, but didn't get one")
+
+    with pytest.raises(ValueError, match=re.escape(expected_message)):
+        bf_s_empty.item()
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 6e68a759b4..9febb0da42 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -1320,10 +1320,6 @@ def test_read_csv_for_names_less_than_columns(session, df_and_gcs_csv_for_two_co
     assert bf_df.shape == pd_df.shape
     assert bf_df.columns.tolist() == pd_df.columns.tolist()
 
-    # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
-    # (b/280889935) or guarantee row ordering.
-    bf_df = bf_df.sort_index()
-
     # Pandas's index name is None, while BigFrames's index name is "rowindex".
     pd_df.index.name = "rowindex"
     pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
@@ -1479,41 +1475,70 @@ def test_read_csv_for_gcs_file_w_header(session, df_and_gcs_csv, header):
 def test_read_csv_w_usecols(session, df_and_local_csv):
     # Compares results for pandas and bigframes engines
     scalars_df, path = df_and_local_csv
+    usecols = ["rowindex", "bool_col"]
     with open(path, "rb") as buffer:
         bf_df = session.read_csv(
             buffer,
             engine="bigquery",
-            usecols=["bool_col"],
+            usecols=usecols,
         )
     with open(path, "rb") as buffer:
         # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
         pd_df = session.read_csv(
             buffer,
-            usecols=["bool_col"],
+            usecols=usecols,
             dtype=scalars_df[["bool_col"]].dtypes.to_dict(),
         )
 
-    # Cannot compare two dataframe due to b/408499371.
-    assert len(bf_df.columns) == 1
-    assert len(pd_df.columns) == 1
+    assert bf_df.shape == pd_df.shape
+    assert bf_df.columns.tolist() == pd_df.columns.tolist()
 
+    # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
+    # (b/280889935) or guarantee row ordering.
+    bf_df = bf_df.set_index("rowindex").sort_index()
+    pd_df = pd_df.set_index("rowindex")
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
 
-@pytest.mark.parametrize(
-    "engine",
-    [
-        pytest.param("bigquery", id="bq_engine"),
-        pytest.param(None, id="default_engine"),
-    ],
-)
-def test_read_csv_local_w_usecols(session, scalars_pandas_df_index, engine):
-    with tempfile.TemporaryDirectory() as dir:
-        path = dir + "/test_read_csv_local_w_usecols.csv"
-        # Using the pandas to_csv method because the BQ one does not support local write.
-        scalars_pandas_df_index.to_csv(path, index=False)
 
-        # df should only have 1 column which is bool_col.
-        df = session.read_csv(path, usecols=["bool_col"], engine=engine)
-        assert len(df.columns) == 1
+def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv):
+    # Compares results for pandas and bigframes engines
+    scalars_df, path = df_and_local_csv
+    usecols = ["rowindex", "bool_col"]
+    with open(path, "rb") as buffer:
+        bf_df = session.read_csv(
+            buffer,
+            engine="bigquery",
+            usecols=usecols,
+            index_col="rowindex",
+        )
+    with open(path, "rb") as buffer:
+        # Convert default pandas dtypes to match BigQuery DataFrames dtypes.
+        pd_df = session.read_csv(
+            buffer,
+            usecols=usecols,
+            index_col="rowindex",
+            dtype=scalars_df[["bool_col"]].dtypes.to_dict(),
+        )
+
+    assert bf_df.shape == pd_df.shape
+    assert bf_df.columns.tolist() == pd_df.columns.tolist()
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas())
+
+
+def test_read_csv_w_indexcol_not_in_usecols(session, df_and_local_csv):
+    _, path = df_and_local_csv
+    with open(path, "rb") as buffer:
+        with pytest.raises(
+            ValueError,
+            match=re.escape("The specified index column(s) were not found"),
+        ):
+            session.read_csv(
+                buffer,
+                engine="bigquery",
+                usecols=["bool_col"],
+                index_col="rowindex",
+            )
 
 
 @pytest.mark.parametrize(
@@ -1553,9 +1578,6 @@ def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index):
         bf_df = session.read_csv(
             path, engine="bigquery", index_col="rowindex", encoding="ISO-8859-1"
         )
-        # BigFrames requires `sort_index()` because BigQuery doesn't preserve row IDs
-        # (b/280889935) or guarantee row ordering.
-        bf_df = bf_df.sort_index()
         pd.testing.assert_frame_equal(
             bf_df.to_pandas(), penguins_pandas_df_default_index
         )
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal/out.sql
index d7e47b6032..a34f3526d6 100644
--- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal/out.sql
+++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal/out.sql
@@ -7,7 +7,7 @@ WITH `bfcte_0` AS (
     CAST(b'Hello, World!' AS BYTES),
     CAST('2021-07-21' AS DATE),
     CAST('2021-07-21T11:39:45' AS DATETIME),
-    ST_GEOGFROMTEXT('POINT (-122.0838511 37.3860517)'),
+    ST_GEOGFROMTEXT('POINT(-122.0838511 37.3860517)'),
     123456789,
     0,
     CAST(1.234567890 AS NUMERIC),
@@ -24,7 +24,7 @@ WITH `bfcte_0` AS (
     CAST(b'\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf' AS BYTES),
     CAST('1991-02-03' AS DATE),
     CAST('1991-01-02T03:45:06' AS DATETIME),
-    ST_GEOGFROMTEXT('POINT (-71.104 42.315)'),
+    ST_GEOGFROMTEXT('POINT(-71.104 42.315)'),
     -987654321,
     1,
     CAST(1.234567890 AS NUMERIC),
@@ -41,7 +41,7 @@ WITH `bfcte_0` AS (
     CAST(b'\xc2\xa1Hola Mundo!' AS BYTES),
     CAST('2023-03-01' AS DATE),
     CAST('2023-03-01T10:55:13' AS DATETIME),
-    ST_GEOGFROMTEXT('POINT (-0.124474760143016 51.5007826749545)'),
+    ST_GEOGFROMTEXT('POINT(-0.124474760143016 51.5007826749545)'),
     314159,
     0,
     CAST(101.101010100 AS NUMERIC),
@@ -109,7 +109,7 @@ WITH `bfcte_0` AS (
     CAST(b'Hello\tBigFrames!\x07' AS BYTES),
     CAST('2023-05-23' AS DATE),
     CAST('2023-05-23T11:37:01' AS DATETIME),
-    ST_GEOGFROMTEXT('LINESTRING (-0.127959 51.507728, -0.127026 51.507473)'),
+    ST_GEOGFROMTEXT('LINESTRING(-0.127959 51.507728, -0.127026 51.507473)'),
     101202303,
     2,
     CAST(-10.090807000 AS NUMERIC),
diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py
index e5e2c58d59..cfee5ea98d 100644
--- a/tests/unit/session/test_io_bigquery.py
+++ b/tests/unit/session/test_io_bigquery.py
@@ -14,7 +14,7 @@
 
 import datetime
 import re
-from typing import Iterable
+from typing import Iterable, Optional
 from unittest import mock
 
 import google.cloud.bigquery as bigquery
@@ -203,7 +203,7 @@ def test_add_and_trim_labels_length_limit_met():
     [(None, None), (30.0, "test_api")],
 )
 def test_start_query_with_client_labels_length_limit_met(
-    mock_bq_client, timeout, api_name
+    mock_bq_client: bigquery.Client, timeout: Optional[float], api_name
 ):
     sql = "select * from abc"
     cur_labels = {
@@ -229,8 +229,12 @@ def test_start_query_with_client_labels_length_limit_met(
     io_bq.start_query_with_client(
         mock_bq_client,
         sql,
-        job_config,
+        job_config=job_config,
+        location=None,
+        project=None,
         timeout=timeout,
+        metrics=None,
+        query_with_job=True,
     )
 
     assert job_config.labels is not None
diff --git a/tests/unit/session/test_local_scan_executor.py b/tests/unit/session/test_local_scan_executor.py
new file mode 100644
index 0000000000..30b1b5f78d
--- /dev/null
+++ b/tests/unit/session/test_local_scan_executor.py
@@ -0,0 +1,105 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import pyarrow
+import pytest
+
+from bigframes import dtypes
+from bigframes.core import identifiers, local_data, nodes
+from bigframes.session import local_scan_executor
+from bigframes.testing import mocks
+
+
+@pytest.fixture
+def object_under_test():
+    return local_scan_executor.LocalScanExecutor()
+
+
+def create_read_local_node(arrow_table: pyarrow.Table):
+    session = mocks.create_bigquery_session()
+    local_data_source = local_data.ManagedArrowTable.from_pyarrow(arrow_table)
+    return nodes.ReadLocalNode(
+        local_data_source=local_data_source,
+        session=session,
+        scan_list=nodes.ScanList(
+            items=tuple(
+                nodes.ScanItem(
+                    id=identifiers.ColumnId(column_name),
+                    dtype=dtypes.arrow_dtype_to_bigframes_dtype(
+                        arrow_table.field(column_name).type
+                    ),
+                    source_id=column_name,
+                )
+                for column_name in arrow_table.column_names
+            ),
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    ("start", "stop", "expected_rows"),
+    (
+        # No-op slices.
+        (None, None, 10),
+        (0, None, 10),
+        (None, 10, 10),
+        # Slices equivalent to limits.
+        (None, 7, 7),
+        (0, 3, 3),
+    ),
+)
+def test_local_scan_executor_with_slice(start, stop, expected_rows, object_under_test):
+    pyarrow_table = pyarrow.Table.from_pydict(
+        {
+            "rowindex": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "letters": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+        }
+    )
+    assert pyarrow_table.num_rows == 10
+
+    local_node = create_read_local_node(pyarrow_table)
+    plan = nodes.SliceNode(
+        child=local_node,
+        start=start,
+        stop=stop,
+    )
+
+    result = object_under_test.execute(plan, ordered=True)
+    result_table = pyarrow.Table.from_batches(result.arrow_batches)
+    assert result_table.num_rows == expected_rows
+
+
+@pytest.mark.parametrize(
+    ("start", "stop", "step"),
+    (
+        (-1, None, 1),
+        (None, -1, 1),
+        (None, None, 2),
+        (None, None, -1),
+        (4, None, 6),
+        (1, 9, 8),
+    ),
+)
+def test_local_scan_executor_with_slice_unsupported_inputs(
+    start, stop, step, object_under_test
+):
+    local_node = create_read_local_node(pyarrow.Table.from_pydict({"col": [1, 2, 3]}))
+    plan = nodes.SliceNode(
+        child=local_node,
+        start=start,
+        stop=stop,
+        step=step,
+    )
+    assert object_under_test.execute(plan, ordered=True) is None
diff --git a/tests/unit/session/test_read_gbq_colab.py b/tests/unit/session/test_read_gbq_colab.py
index cffc6b3af7..c4635f85a9 100644
--- a/tests/unit/session/test_read_gbq_colab.py
+++ b/tests/unit/session/test_read_gbq_colab.py
@@ -80,3 +80,19 @@ def test_read_gbq_colab_includes_formatted_values_in_dry_run(monkeypatch):
 
     assert config.dry_run
     assert query.strip() == expected.strip()
+
+
+def test_read_gbq_colab_doesnt_set_destination_table():
+    """For best performance, we don't try to workaround the 10 GB query results limitation."""
+    session = mocks.create_bigquery_session()
+
+    _ = session._read_gbq_colab("SELECT 'my-test-query';")
+    queries = session._queries  # type: ignore
+    configs = session._job_configs  # type: ignore
+
+    for query, config in zip(queries, configs):
+        if query == "SELECT 'my-test-query';" and not config.dry_run:
+            break
+
+    assert query == "SELECT 'my-test-query';"
+    assert config.destination is None
diff --git a/tests/unit/session/test_read_gbq_query.py b/tests/unit/session/test_read_gbq_query.py
new file mode 100644
index 0000000000..afd9922426
--- /dev/null
+++ b/tests/unit/session/test_read_gbq_query.py
@@ -0,0 +1,37 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for read_gbq_query functions."""
+
+from bigframes.testing import mocks
+
+
+def test_read_gbq_query_sets_destination_table():
+    """Workaround the 10 GB query results limitation by setting a destination table.
+
+    See internal issue b/303057336.
+    """
+    # Use partial ordering mode to skip column uniqueness checks.
+    session = mocks.create_bigquery_session(ordering_mode="partial")
+
+    _ = session.read_gbq_query("SELECT 'my-test-query';")
+    queries = session._queries  # type: ignore
+    configs = session._job_configs  # type: ignore
+
+    for query, config in zip(queries, configs):
+        if query == "SELECT 'my-test-query';" and not config.dry_run:
+            break
+
+    assert query == "SELECT 'my-test-query';"
+    assert config.destination is not None
diff --git a/tests/unit/session/test_read_gbq_table.py b/tests/unit/session/test_read_gbq_table.py
index 6a4ae7cb60..0c67e05813 100644
--- a/tests/unit/session/test_read_gbq_table.py
+++ b/tests/unit/session/test_read_gbq_table.py
@@ -81,14 +81,17 @@ def test_infer_unique_columns(index_cols, primary_keys, values_distinct, expecte
     }
     bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True)
     bqclient.project = "test-project"
-    bqclient.get_table.return_value = table
+    session = mocks.create_bigquery_session(
+        bqclient=bqclient, table_schema=table.schema
+    )
 
+    # Mock bqclient _after_ creating session to override its mocks.
+    bqclient.get_table.return_value = table
+    bqclient.query_and_wait.side_effect = None
     bqclient.query_and_wait.return_value = (
         {"total_count": 3, "distinct_count": 3 if values_distinct else 2},
     )
-    session = mocks.create_bigquery_session(
-        bqclient=bqclient, table_schema=table.schema
-    )
+
     table._properties["location"] = session._location
 
     result = bf_read_gbq_table.infer_unique_columns(bqclient, table, index_cols)
diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py
index cbd31f588a..26b74a3f8a 100644
--- a/tests/unit/session/test_session.py
+++ b/tests/unit/session/test_session.py
@@ -273,7 +273,11 @@ def test_default_index_warning_raised_by_read_gbq(table):
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
     bqclient.query_and_wait.return_value = ({"total_count": 3, "distinct_count": 2},)
-    session = mocks.create_bigquery_session(bqclient=bqclient)
+    session = mocks.create_bigquery_session(
+        bqclient=bqclient,
+        # DefaultIndexWarning is only relevant for strict mode.
+        ordering_mode="strict",
+    )
     table._properties["location"] = session._location
 
     with pytest.warns(bigframes.exceptions.DefaultIndexWarning):
@@ -296,7 +300,11 @@ def test_default_index_warning_not_raised_by_read_gbq_index_col_sequential_int64
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
     bqclient.query_and_wait.return_value = ({"total_count": 4, "distinct_count": 3},)
-    session = mocks.create_bigquery_session(bqclient=bqclient)
+    session = mocks.create_bigquery_session(
+        bqclient=bqclient,
+        # DefaultIndexWarning is only relevant for strict mode.
+        ordering_mode="strict",
+    )
     table._properties["location"] = session._location
 
     # No warnings raised because we set the option allowing the default indexes.
@@ -344,7 +352,10 @@ def test_default_index_warning_not_raised_by_read_gbq_index_col_columns(
         {"total_count": total_count, "distinct_count": distinct_count},
     )
     session = mocks.create_bigquery_session(
-        bqclient=bqclient, table_schema=table.schema
+        bqclient=bqclient,
+        table_schema=table.schema,
+        # DefaultIndexWarning is only relevant for strict mode.
+        ordering_mode="strict",
     )
     table._properties["location"] = session._location
 
@@ -386,7 +397,10 @@ def test_default_index_warning_not_raised_by_read_gbq_primary_key(table):
     bqclient.project = "test-project"
     bqclient.get_table.return_value = table
     session = mocks.create_bigquery_session(
-        bqclient=bqclient, table_schema=table.schema
+        bqclient=bqclient,
+        table_schema=table.schema,
+        # DefaultIndexWarning is only relevant for strict mode.
+        ordering_mode="strict",
     )
     table._properties["location"] = session._location
 
diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py
new file mode 100644
index 0000000000..2bda563418
--- /dev/null
+++ b/tests/unit/test_dataframe_polars.py
@@ -0,0 +1,4422 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+import operator
+import pathlib
+import tempfile
+import typing
+from typing import Generator, List, Tuple
+
+import numpy as np
+import pandas as pd
+import pandas.testing
+import pytest
+
+import bigframes
+import bigframes._config.display_options as display_options
+import bigframes.core.indexes as bf_indexes
+import bigframes.dataframe as dataframe
+import bigframes.pandas as bpd
+import bigframes.series as series
+from tests.system.utils import (
+    assert_dfs_equivalent,
+    assert_pandas_df_equal,
+    assert_series_equal,
+    assert_series_equivalent,
+    convert_pandas_dtypes,
+)
+
+pytest.importorskip("polars")
+pytest.importorskip("pandas", minversion="2.0.0")
+
+CURRENT_DIR = pathlib.Path(__file__).parent
+DATA_DIR = CURRENT_DIR.parent / "data"
+
+
+@pytest.fixture(scope="module", autouse=True)
+def session() -> Generator[bigframes.Session, None, None]:
+    import bigframes.core.global_session
+    from bigframes.testing import polars_session
+
+    session = polars_session.TestSession()
+    with bigframes.core.global_session._GlobalSessionContext(session):
+        yield session
+
+
+@pytest.fixture(scope="module")
+def scalars_pandas_df_index() -> pd.DataFrame:
+    """pd.DataFrame pointing at test data."""
+
+    df = pd.read_json(
+        DATA_DIR / "scalars.jsonl",
+        lines=True,
+    )
+    convert_pandas_dtypes(df, bytes_col=True)
+
+    df = df.set_index("rowindex", drop=False)
+    df.index.name = None
+    return df.set_index("rowindex").sort_index()
+
+
+@pytest.fixture(scope="module")
+def scalars_df_index(
+    session: bigframes.Session, scalars_pandas_df_index
+) -> bpd.DataFrame:
+    return session.read_pandas(scalars_pandas_df_index)
+
+
+@pytest.fixture(scope="module")
+def scalars_df_2_index(
+    session: bigframes.Session, scalars_pandas_df_index
+) -> bpd.DataFrame:
+    return session.read_pandas(scalars_pandas_df_index)
+
+
+@pytest.fixture(scope="module")
+def scalars_dfs(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    return scalars_df_index, scalars_pandas_df_index
+
+
+def test_df_construct_copy(scalars_dfs):
+    columns = ["int64_col", "string_col", "float64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # Make the mapping from label to col_id non-trivial
+    bf_df = scalars_df.copy()
+    bf_df["int64_col"] = bf_df["int64_col"] / 2
+    pd_df = scalars_pandas_df.copy()
+    pd_df["int64_col"] = pd_df["int64_col"] / 2
+
+    bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()
+
+    pd_result = pd.DataFrame(pd_df, columns=columns)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_construct_pandas_default(scalars_dfs):
+    # This should trigger the inlined codepath
+    columns = [
+        "int64_too",
+        "int64_col",
+        "float64_col",
+        "bool_col",
+        "string_col",
+        "date_col",
+        "datetime_col",
+        "numeric_col",
+        "float64_col",
+        "time_col",
+        "timestamp_col",
+    ]
+    _, scalars_pandas_df = scalars_dfs
+    bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas()
+    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_construct_structs(session):
+    pd_frame = pd.Series(
+        [
+            {"version": 1, "project": "pandas"},
+            {"version": 2, "project": "pandas"},
+            {"version": 1, "project": "numpy"},
+        ]
+    ).to_frame()
+    bf_series = session.read_pandas(pd_frame)
+    pd.testing.assert_frame_equal(
+        bf_series.to_pandas(), pd_frame, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_construct_pandas_set_dtype(scalars_dfs):
+    columns = [
+        "int64_too",
+        "int64_col",
+        "float64_col",
+        "bool_col",
+    ]
+    _, scalars_pandas_df = scalars_dfs
+    bf_result = dataframe.DataFrame(
+        scalars_pandas_df, columns=columns, dtype="Float64"
+    ).to_pandas()
+    pd_result = pd.DataFrame(scalars_pandas_df, columns=columns, dtype="Float64")
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_construct_from_series(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = dataframe.DataFrame(
+        {"a": scalars_df["int64_col"], "b": scalars_df["string_col"]},
+        dtype="string[pyarrow]",
+    )
+    pd_result = pd.DataFrame(
+        {"a": scalars_pandas_df["int64_col"], "b": scalars_pandas_df["string_col"]},
+        dtype="string[pyarrow]",
+    )
+    assert_dfs_equivalent(pd_result, bf_result)
+
+
+def test_df_construct_from_dict():
+    input_dict = {
+        "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
+        # With a space in column name. We use standardized SQL schema ids to solve the problem that BQ schema doesn't support column names with spaces. b/296751058
+        "Max Speed": [380.0, 370.0, 24.0, 26.0],
+    }
+    bf_result = dataframe.DataFrame(input_dict).to_pandas()
+    pd_result = pd.DataFrame(input_dict)
+
+    pandas.testing.assert_frame_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_construct_dtype():
+    data = {
+        "int_col": [1, 2, 3],
+        "string_col": ["1.1", "2.0", "3.5"],
+        "float_col": [1.0, 2.0, 3.0],
+    }
+    dtype = pd.StringDtype(storage="pyarrow")
+    bf_result = dataframe.DataFrame(data, dtype=dtype)
+    pd_result = pd.DataFrame(data, dtype=dtype)
+    pd_result.index = pd_result.index.astype("Int64")
+    pandas.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_get_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    series = scalars_df[col_name]
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df[col_name]
+    assert_series_equal(bf_result, pd_result)
+
+
+def test_get_column_nonstring(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    series = scalars_df.rename(columns={"int64_col": 123.1})[123.1]
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df.rename(columns={"int64_col": 123.1})[123.1]
+    assert_series_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    "row_slice",
+    [
+        (slice(1, 7, 2)),
+        (slice(1, 7, None)),
+        (slice(None, -3, None)),
+    ],
+)
+def test_get_rows_with_slice(scalars_dfs, row_slice):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[row_slice].to_pandas()
+    pd_result = scalars_pandas_df[row_slice]
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_hasattr(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    assert hasattr(scalars_df, "int64_col")
+    assert hasattr(scalars_df, "head")
+    assert not hasattr(scalars_df, "not_exist")
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_head_with_custom_column_labels(
+    scalars_df_index, scalars_pandas_df_index, ordered
+):
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    bf_df = scalars_df_index.rename(columns=rename_mapping).head(3)
+    bf_result = bf_df.to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).head(3)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_index):
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    bf_df = scalars_df_index.rename(columns=rename_mapping).tail(3)
+    bf_result = bf_df.to_pandas()
+    pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).tail(3)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_get_column_by_attr(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    series = scalars_df.int64_col
+    bf_result = series.to_pandas()
+    pd_result = scalars_pandas_df.int64_col
+    assert_series_equal(bf_result, pd_result)
+
+
+def test_get_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = ["bool_col", "float64_col", "int64_col"]
+    df_subset = scalars_df.get(col_names)
+    df_pandas = df_subset.to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df[col_names].columns
+    )
+
+
+def test_get_columns_default(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    col_names = ["not", "column", "names"]
+    result = scalars_df.get(col_names, "default_val")
+    assert result == "default_val"
+
+
+@pytest.mark.parametrize(
+    ("loc", "column", "value", "allow_duplicates"),
+    [
+        (0, 666, 2, False),
+        (5, "float64_col", 2.2, True),
+        (13, "rowindex_2", [8, 7, 6, 5, 4, 3, 2, 1, 0], True),
+        pytest.param(
+            14,
+            "test",
+            2,
+            False,
+            marks=pytest.mark.xfail(
+                raises=IndexError,
+            ),
+        ),
+        pytest.param(
+            12,
+            "int64_col",
+            2,
+            False,
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
+    ],
+)
+def test_insert(scalars_dfs, loc, column, value, allow_duplicates):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # insert works inplace, so will influence other tests.
+    # make a copy to avoid inplace changes.
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.insert(loc, column, value, allow_duplicates)
+    pd_df.insert(loc, column, value, allow_duplicates)
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False)
+
+
+def test_where_series_cond(scalars_df_index, scalars_pandas_df_index):
+    # Condition is dataframe, other is None (as default).
+    cond_bf = scalars_df_index["int64_col"] > 0
+    cond_pd = scalars_pandas_df_index["int64_col"] > 0
+    bf_result = scalars_df_index.where(cond_bf).to_pandas()
+    pd_result = scalars_pandas_df_index.where(cond_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_mask_series_cond(scalars_df_index, scalars_pandas_df_index):
+    cond_bf = scalars_df_index["int64_col"] > 0
+    cond_pd = scalars_pandas_df_index["int64_col"] > 0
+
+    bf_df = scalars_df_index[["int64_too", "int64_col", "float64_col"]]
+    pd_df = scalars_pandas_df_index[["int64_too", "int64_col", "float64_col"]]
+    bf_result = bf_df.mask(cond_bf, bf_df + 1).to_pandas()
+    pd_result = pd_df.mask(cond_pd, pd_df + 1)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_series_multi_index(scalars_df_index, scalars_pandas_df_index):
+    # Test when a dataframe has multi-index or multi-columns.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+
+    dataframe_bf.columns = pd.MultiIndex.from_tuples(
+        [("str1", 1), ("str2", 2)], names=["STR", "INT"]
+    )
+    cond_bf = dataframe_bf["str1"] > 0
+
+    with pytest.raises(NotImplementedError) as context:
+        dataframe_bf.where(cond_bf).to_pandas()
+    assert (
+        str(context.value)
+        == "The dataframe.where() method does not support multi-index and/or multi-column."
+    )
+
+
+def test_where_series_cond_const_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a series, other is a constant.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+    dataframe_bf.columns.name = "test_name"
+    dataframe_pd.columns.name = "test_name"
+
+    cond_bf = dataframe_bf["int64_col"] > 0
+    cond_pd = dataframe_pd["int64_col"] > 0
+    other = 0
+
+    bf_result = dataframe_bf.where(cond_bf, other).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_series_cond_dataframe_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a series, other is a dataframe.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf["int64_col"] > 0
+    cond_pd = dataframe_pd["int64_col"] > 0
+    other_bf = -dataframe_bf
+    other_pd = -dataframe_pd
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a dataframe, other is None.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+
+    bf_result = dataframe_bf.where(cond_bf, None).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, None)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond_const_other(scalars_df_index, scalars_pandas_df_index):
+    # Condition is a dataframe, other is a constant.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+    other_bf = 10
+    other_pd = 10
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_where_dataframe_cond_dataframe_other(
+    scalars_df_index, scalars_pandas_df_index
+):
+    # Condition is a dataframe, other is a dataframe.
+    columns = ["int64_col", "float64_col"]
+    dataframe_bf = scalars_df_index[columns]
+    dataframe_pd = scalars_pandas_df_index[columns]
+
+    cond_bf = dataframe_bf > 0
+    cond_pd = dataframe_pd > 0
+    other_bf = dataframe_bf * 2
+    other_pd = dataframe_pd * 2
+
+    bf_result = dataframe_bf.where(cond_bf, other_bf).to_pandas()
+    pd_result = dataframe_pd.where(cond_pd, other_pd)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_drop_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    df_pandas = scalars_df.drop(columns=col_name).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.drop(columns=col_name).columns
+    )
+
+
+def test_drop_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = ["int64_col", "geography_col", "time_col"]
+    df_pandas = scalars_df.drop(columns=col_names).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.drop(columns=col_names).columns
+    )
+
+
+def test_drop_labels_axis_1(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    labels = ["int64_col", "geography_col", "time_col"]
+
+    pd_result = scalars_pandas_df.drop(labels=labels, axis=1)
+    bf_result = scalars_df.drop(labels=labels, axis=1).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_with_custom_column_labels(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    rename_mapping = {
+        "int64_col": "Integer Column",
+        "string_col": "言語列",
+    }
+    dropped_columns = [
+        "言語列",
+        "timestamp_col",
+    ]
+    bf_df = scalars_df.rename(columns=rename_mapping).drop(columns=dropped_columns)
+    bf_result = bf_df.to_pandas()
+    pd_result = scalars_pandas_df.rename(columns=rename_mapping).drop(
+        columns=dropped_columns
+    )
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_memory_usage(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.memory_usage()
+    bf_result = scalars_df.memory_usage()
+
+    pd.testing.assert_series_equal(pd_result, bf_result, rtol=1.5)
+
+
+def test_df_info(scalars_dfs):
+    expected = (
+        "<class 'bigframes.dataframe.DataFrame'>\n"
+        "Index: 9 entries, 0 to 8\n"
+        "Data columns (total 13 columns):\n"
+        "  #  Column         Non-Null Count    Dtype\n"
+        "---  -------------  ----------------  ------------------------------\n"
+        "  0  bool_col       8 non-null        boolean\n"
+        "  1  bytes_col      6 non-null        binary[pyarrow]\n"
+        "  2  date_col       7 non-null        date32[day][pyarrow]\n"
+        "  3  datetime_col   6 non-null        timestamp[us][pyarrow]\n"
+        "  4  geography_col  4 non-null        geometry\n"
+        "  5  int64_col      8 non-null        Int64\n"
+        "  6  int64_too      9 non-null        Int64\n"
+        "  7  numeric_col    6 non-null        decimal128(38, 9)[pyarrow]\n"
+        "  8  float64_col    7 non-null        Float64\n"
+        "  9  rowindex_2     9 non-null        Int64\n"
+        " 10  string_col     8 non-null        string\n"
+        " 11  time_col       6 non-null        time64[us][pyarrow]\n"
+        " 12  timestamp_col  6 non-null        timestamp[us, tz=UTC][pyarrow]\n"
+        "dtypes: Float64(1), Int64(3), binary[pyarrow](1), boolean(1), date32[day][pyarrow](1), decimal128(38, 9)[pyarrow](1), geometry(1), string(1), time64[us][pyarrow](1), timestamp[us, tz=UTC][pyarrow](1), timestamp[us][pyarrow](1)\n"
+        "memory usage: 1269 bytes\n"
+    )
+
+    scalars_df, _ = scalars_dfs
+    bf_result = io.StringIO()
+
+    scalars_df.info(buf=bf_result)
+
+    assert expected == bf_result.getvalue()
+
+
+@pytest.mark.parametrize(
+    ("include", "exclude"),
+    [
+        ("Int64", None),
+        (["int"], None),
+        ("number", None),
+        ([pd.Int64Dtype(), pd.BooleanDtype()], None),
+        (None, [pd.Int64Dtype(), pd.BooleanDtype()]),
+        ("Int64", ["boolean"]),
+    ],
+)
+def test_select_dtypes(scalars_dfs, include, exclude):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.select_dtypes(include=include, exclude=exclude)
+    bf_result = scalars_df.select_dtypes(include=include, exclude=exclude).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(index=[4, 1, 2])
+    bf_result = scalars_df.drop(index=[4, 1, 2]).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_pandas_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_pandas_df.iloc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    drop_index = scalars_df.loc[[4, 1, 2]].index
+    drop_pandas_index = scalars_pandas_df.loc[[4, 1, 2]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_index_with_na(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index("bytes_col")
+    scalars_pandas_df = scalars_pandas_df.set_index("bytes_col")
+    drop_index = scalars_df.iloc[[3, 5]].index
+    drop_pandas_index = scalars_pandas_df.iloc[[3, 5]].index
+
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)  # drop_pandas_index)
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_bigframes_multiindex(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    sub_df = scalars_df.iloc[[4, 1, 2]]
+    sub_pandas_df = scalars_pandas_df.iloc[[4, 1, 2]]
+    sub_df = sub_df.set_index(["bytes_col", "numeric_col"])
+    sub_pandas_df = sub_pandas_df.set_index(["bytes_col", "numeric_col"])
+    drop_index = sub_df.index
+    drop_pandas_index = sub_pandas_df.index
+
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+    bf_result = scalars_df.drop(index=drop_index).to_pandas()
+    pd_result = scalars_pandas_df.drop(index=drop_pandas_index)
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_labels_axis_0(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(labels=[4, 1, 2], axis=0)
+    bf_result = scalars_df.drop(labels=[4, 1, 2], axis=0).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_drop_index_and_columns(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.drop(index=[4, 1, 2], columns="int64_col")
+    bf_result = scalars_df.drop(index=[4, 1, 2], columns="int64_col").to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
+def test_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"bool_col": 1.2345}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_df_peek(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=True)
+
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_with_large_results_not_allowed(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    peek_result = scalars_df.peek(n=3, force=False, allow_large_results=False)
+
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_filtered(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False)
+    pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
+    assert len(peek_result) == 3
+
+
+def test_df_peek_force_default(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3)
+    pd.testing.assert_index_equal(
+        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
+    )
+    assert len(peek_result) == 3
+
+
+def test_df_peek_reset_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    peek_result = (
+        scalars_df[["int64_col", "int64_too"]].reset_index(drop=True).peek(n=3)
+    )
+    pd.testing.assert_index_equal(
+        scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns
+    )
+    assert len(peek_result) == 3
+
+
+def test_repr_w_all_rows(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    # Remove columns with flaky formatting, like NUMERIC columns (which use the
+    # object dtype). Also makes a copy so that mutating the index name doesn't
+    # break other tests.
+    scalars_df = scalars_df.drop(columns=["numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.drop(columns=["numeric_col"])
+
+    # When there are 10 or fewer rows, the outputs should be identical.
+    actual = repr(scalars_df.head(10))
+
+    with display_options.pandas_repr(bigframes.options.display):
+        expected = repr(scalars_pandas_df.head(10))
+
+    assert actual == expected
+
+
+def test_join_repr(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    scalars_df = (
+        scalars_df[["int64_col"]]
+        .join(scalars_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    scalars_pandas_df = (
+        scalars_pandas_df[["int64_col"]]
+        .join(scalars_pandas_df.set_index("int64_col")[["int64_too"]])
+        .sort_index()
+    )
+    # Pandas join result index name seems to depend on the index values in a way that bigframes can't match exactly
+    scalars_pandas_df.index.name = None
+
+    actual = repr(scalars_df)
+
+    with display_options.pandas_repr(bigframes.options.display):
+        expected = repr(scalars_pandas_df)
+
+    assert actual == expected
+
+
+def test_repr_html_w_all_rows(scalars_dfs, session):
+    scalars_df, _ = scalars_dfs
+    # get a pandas df of the expected format
+    df, _ = scalars_df._block.to_pandas()
+    pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
+    pandas_df.index.name = scalars_df.index.name
+
+    # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
+    actual = scalars_df.head(10)._repr_html_()
+
+    with display_options.pandas_repr(bigframes.options.display):
+        pandas_repr = pandas_df.head(10)._repr_html_()
+
+    expected = (
+        pandas_repr
+        + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
+    )
+    assert actual == expected
+
+
+def test_df_column_name_with_space(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"bool_col": "bool  col"}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_df_column_name_duplicate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"int64_too": "int64_col"}
+    df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas()
+    pd.testing.assert_index_equal(
+        df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns
+    )
+
+
+def test_get_df_column_name_duplicate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"int64_too": "int64_col"}
+
+    bf_result = scalars_df.rename(columns=col_name_dict)["int64_col"].to_pandas()
+    pd_result = scalars_pandas_df.rename(columns=col_name_dict)["int64_col"]
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+
+
+@pytest.mark.parametrize(
+    ("indices", "axis"),
+    [
+        ([1, 3, 5], 0),
+        ([2, 4, 6], 1),
+        ([1, -3, -5, -6], "index"),
+        ([-2, -4, -6], "columns"),
+    ],
+)
+def test_take_df(scalars_dfs, indices, axis):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.take(indices, axis=axis).to_pandas()
+    pd_result = scalars_pandas_df.take(indices, axis=axis)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_filter_df(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_bool_series = scalars_df["bool_col"]
+    bf_result = scalars_df[bf_bool_series].to_pandas()
+
+    pd_bool_series = scalars_pandas_df["bool_col"]
+    pd_result = scalars_pandas_df[pd_bool_series]
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_new_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"new_col": 2}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_loc(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[:, "new_col"] = 2
+    pd_df.loc[:, "new_col"] = 2
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("scalar",),
+    [
+        (2.1,),
+        (None,),
+    ],
+)
+def test_assign_new_column_w_setitem(scalars_dfs, scalar):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = scalar
+    pd_df["new_col"] = scalar
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `float64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Float64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_dataframe(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["int64_col"] = bf_df["int64_too"].to_frame()
+    pd_df["int64_col"] = pd_df["int64_too"].to_frame()
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_df["int64_col"] = pd_df["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_assign_new_column_w_setitem_dataframe_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(ValueError):
+        bf_df["impossible_col"] = bf_df[["int64_too", "string_col"]]
+    with pytest.raises(ValueError):
+        pd_df["impossible_col"] = pd_df[["int64_too", "string_col"]]
+
+
+def test_assign_new_column_w_setitem_list(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_repeated(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    pd_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result["new_col_2"] = pd_result["new_col_2"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    # set the custom index
+    pd_df = pd_df.set_index(["string_col", "int64_col"])
+    bf_df = bf_df.set_index(["string_col", "int64_col"])
+
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_new_column_w_setitem_list_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(ValueError):
+        pd_df["new_col"] = [1, 2, 3]  # should be len 9, is 3
+    with pytest.raises(ValueError):
+        bf_df["new_col"] = [1, 2, 3]
+
+
+def test_assign_existing_column(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"int64_col": 2}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_listlike_to_empty_df(session):
+    empty_df = dataframe.DataFrame(session=session)
+    empty_pandas_df = pd.DataFrame()
+
+    bf_result = empty_df.assign(new_col=[1, 2, 3])
+    pd_result = empty_pandas_df.assign(new_col=[1, 2, 3])
+
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result.index = pd_result.index.astype("Int64")
+    assert_pandas_df_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_assign_to_empty_df_multiindex_error(session):
+    empty_df = dataframe.DataFrame(session=session)
+    empty_pandas_df = pd.DataFrame()
+
+    empty_df["empty_col_1"] = typing.cast(series.Series, [])
+    empty_df["empty_col_2"] = typing.cast(series.Series, [])
+    empty_pandas_df["empty_col_1"] = []
+    empty_pandas_df["empty_col_2"] = []
+    empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
+    empty_pandas_df = empty_pandas_df.set_index(["empty_col_1", "empty_col_2"])
+
+    with pytest.raises(ValueError):
+        empty_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
+    with pytest.raises(ValueError):
+        empty_pandas_df.assign(new_col=[1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_assign_series(scalars_dfs, ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    column_name = "int64_col"
+    df = scalars_df.assign(new_col=scalars_df[column_name])
+    bf_result = df.to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df.assign(new_col=scalars_pandas_df[column_name])
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+def test_assign_series_overwrite(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    column_name = "int64_col"
+    df = scalars_df.assign(**{column_name: scalars_df[column_name] + 3})
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(
+        **{column_name: scalars_pandas_df[column_name] + 3}
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_sequential(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"int64_col": 2, "new_col": 3, "new_col2": 4}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result["new_col2"] = pd_result["new_col2"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+# Require an index so that the self-join is consistent each time.
+def test_assign_same_table_different_index_performs_self_join(
+    scalars_df_index, scalars_pandas_df_index
+):
+    column_name = "int64_col"
+    bf_df = scalars_df_index.assign(
+        alternative_index=scalars_df_index["rowindex_2"] + 2
+    )
+    pd_df = scalars_pandas_df_index.assign(
+        alternative_index=scalars_pandas_df_index["rowindex_2"] + 2
+    )
+    bf_df_2 = bf_df.set_index("alternative_index")
+    pd_df_2 = pd_df.set_index("alternative_index")
+    bf_result = bf_df.assign(new_col=bf_df_2[column_name] * 10).to_pandas()
+    pd_result = pd_df.assign(new_col=pd_df_2[column_name] * 10)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+# Different table expression must have Index
+def test_assign_different_df(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    column_name = "int64_col"
+    df = scalars_df_index.assign(new_col=scalars_df_2_index[column_name])
+    bf_result = df.to_pandas()
+    # Doesn't matter to pandas if it comes from the same DF or a different DF.
+    pd_result = scalars_pandas_df_index.assign(
+        new_col=scalars_pandas_df_index[column_name]
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_assign_different_df_w_loc(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    bf_df = scalars_df_index.copy()
+    bf_df2 = scalars_df_2_index.copy()
+    pd_df = scalars_pandas_df_index.copy()
+    assert "int64_col" in bf_df.columns
+    assert "int64_col" in pd_df.columns
+    bf_df.loc[:, "int64_col"] = bf_df2.loc[:, "int64_col"] + 1
+    pd_df.loc[:, "int64_col"] = pd_df.loc[:, "int64_col"] + 1
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_different_df_w_setitem(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index
+):
+    bf_df = scalars_df_index.copy()
+    bf_df2 = scalars_df_2_index.copy()
+    pd_df = scalars_pandas_df_index.copy()
+    assert "int64_col" in bf_df.columns
+    assert "int64_col" in pd_df.columns
+    bf_df["int64_col"] = bf_df2["int64_col"] + 1
+    pd_df["int64_col"] = pd_df["int64_col"] + 1
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_assign_callable_lambda(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    kwargs = {"new_col": lambda x: x["int64_col"] + x["int64_too"]}
+    df = scalars_df.assign(**kwargs)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.assign(**kwargs)
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("axis", "how", "ignore_index", "subset"),
+    [
+        (0, "any", False, None),
+        (0, "any", True, None),
+        (0, "all", False, ["bool_col", "time_col"]),
+        (0, "any", False, ["bool_col", "time_col"]),
+        (0, "all", False, "time_col"),
+        (1, "any", False, None),
+        (1, "all", False, None),
+    ],
+)
+def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, how=how, ignore_index=ignore_index, subset=subset
+    )
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_dropna_range_columns(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df.columns = pandas.RangeIndex(0, len(scalars_df.columns))
+    scalars_pandas_df.columns = pandas.RangeIndex(0, len(scalars_pandas_df.columns))
+
+    df = scalars_df.dropna()
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.dropna()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_interpolate(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "int64_too", "float64_col"]
+    bf_result = scalars_df[columns].interpolate().to_pandas()
+    # Pandas can only interpolate on "float64" columns
+    # https://github.com/pandas-dev/pandas/issues/40252
+    pd_result = scalars_pandas_df[columns].astype("float64").interpolate()
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "col, fill_value",
+    [
+        (["int64_col", "float64_col"], 3),
+        (["string_col"], "A"),
+        (["datetime_col"], pd.Timestamp("2023-01-01")),
+    ],
+)
+def test_df_fillna(scalars_dfs, col, fill_value):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[col].fillna(fill_value).to_pandas()
+    pd_result = scalars_pandas_df[col].fillna(fill_value)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_ffill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].ffill(limit=1).to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].ffill(limit=1)
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_bfill(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[["int64_col", "float64_col"]].bfill().to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "float64_col"]].bfill()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_series_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+
+    def foo(series, arg1, arg2, *, kwarg1=0, kwarg2=0):
+        return series**2 + (arg1 * arg2 % 4) + (kwarg1 * kwarg2 % 7)
+
+    bf_result = (
+        scalars_df_index[columns]
+        .apply(foo, args=(33, 61), kwarg1=52, kwarg2=21)
+        .to_pandas()
+    )
+
+    pd_result = scalars_pandas_df_index[columns].apply(
+        foo, args=(33, 61), kwarg1=52, kwarg2=21
+    )
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_listlike_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+    bf_result = (
+        scalars_df_index[columns].apply(lambda x: [len(x), x.min(), 24]).to_pandas()
+    )
+
+    pd_result = scalars_pandas_df_index[columns].apply(lambda x: [len(x), x.min(), 24])
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result.index = pd_result.index.astype("Int64")
+    pd_result = pd_result.astype("Int64")
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_apply_series_scalar_callable(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+    bf_result = scalars_df_index[columns].apply(lambda x: x.sum())
+
+    pd_result = scalars_pandas_df_index[columns].apply(lambda x: x.sum())
+
+    pandas.testing.assert_series_equal(bf_result, pd_result)
+
+
+def test_df_pipe(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    columns = ["int64_too", "int64_col"]
+
+    def foo(x: int, y: int, df):
+        return (df + x) % y
+
+    bf_result = (
+        scalars_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+        .to_pandas()
+    )
+
+    pd_result = (
+        scalars_pandas_df_index[columns]
+        .pipe((foo, "df"), x=7, y=9)
+        .pipe(lambda x: x**2)
+    )
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_keys(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    pandas.testing.assert_index_equal(
+        scalars_df_index.keys(), scalars_pandas_df_index.keys()
+    )
+
+
+def test_df_iter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
+        assert bf_i == df_i
+
+
+def test_iterrows(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df_index = scalars_df_index.add_suffix("_suffix", axis=1)
+    scalars_pandas_df_index = scalars_pandas_df_index.add_suffix("_suffix", axis=1)
+    for (bf_index, bf_series), (pd_index, pd_series) in zip(
+        scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
+    ):
+        assert bf_index == pd_index
+        pandas.testing.assert_series_equal(bf_series, pd_series)
+
+
+@pytest.mark.parametrize(
+    (
+        "index",
+        "name",
+    ),
+    [
+        (
+            True,
+            "my_df",
+        ),
+        (False, None),
+    ],
+)
+def test_itertuples(scalars_df_index, index, name):
+    # Numeric has slightly different representation as a result of conversions.
+    bf_tuples = scalars_df_index.itertuples(index, name)
+    pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
+    for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
+        assert bf_tuple == pd_tuple
+
+
+def test_df_cross_merge(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    bf_result = left.merge(right, "cross").to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        "cross",
+    )
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_df_merge(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    on = "rowindex_2"
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    df = left.merge(right, merge_how, on, sort=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        merge_how,
+        on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("left_on", "right_on"),
+    [
+        (["int64_col", "rowindex_2"], ["int64_col", "rowindex_2"]),
+        (["rowindex_2", "int64_col"], ["int64_col", "rowindex_2"]),
+        # Polars engine is currently strict on join key types
+        # (["rowindex_2", "float64_col"], ["int64_col", "rowindex_2"]),
+    ],
+)
+def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    df = left.merge(right, "outer", left_on=left_on, right_on=right_on, sort=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        "outer",
+        left_on=left_on,
+        right_on=right_on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_merge_custom_col_name(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col"]
+    right_columns = ["int64_col", "bool_col", "string_col"]
+    on = "int64_col"
+    rename_columns = {"float64_col": "f64_col"}
+
+    left = scalars_df[left_columns]
+    left = left.rename(columns=rename_columns)
+    right = scalars_df[right_columns]
+    df = left.merge(right, merge_how, on, sort=True)
+    bf_result = df.to_pandas()
+
+    pandas_left_df = scalars_pandas_df[left_columns]
+    pandas_left_df = pandas_left_df.rename(columns=rename_columns)
+    pandas_right_df = scalars_pandas_df[right_columns]
+    pd_result = pandas_left_df.merge(pandas_right_df, merge_how, on, sort=True)
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("merge_how",),
+    [
+        ("inner",),
+        ("outer",),
+        ("left",),
+        ("right",),
+    ],
+)
+def test_merge_left_on_right_on(scalars_dfs, merge_how):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "int64_too"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    right = scalars_df[right_columns]
+
+    df = left.merge(
+        right, merge_how, left_on="int64_too", right_on="rowindex_2", sort=True
+    )
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns],
+        merge_how,
+        left_on="int64_too",
+        right_on="rowindex_2",
+        sort=True,
+    )
+
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
+
+
+def test_shape(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.shape
+    pd_result = scalars_pandas_df.shape
+
+    assert bf_result == pd_result
+
+
+def test_len(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = len(scalars_df)
+    pd_result = len(scalars_pandas_df)
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("n_rows",),
+    [
+        (50,),
+        (10000,),
+    ],
+)
+def test_df_len_local(session, n_rows):
+    assert (
+        len(
+            session.read_pandas(
+                pd.DataFrame(np.random.randint(1, 7, n_rows), columns=["one"]),
+            )
+        )
+        == n_rows
+    )
+
+
+def test_size(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.size
+    pd_result = scalars_pandas_df.size
+
+    assert bf_result == pd_result
+
+
+def test_ndim(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df.ndim
+    pd_result = scalars_pandas_df.ndim
+
+    assert bf_result == pd_result
+
+
+def test_empty_false(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.empty
+    pd_result = scalars_pandas_df.empty
+
+    assert bf_result == pd_result
+
+
+def test_empty_true_column_filter(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[[]].empty
+    pd_result = scalars_pandas_df[[]].empty
+
+    assert bf_result == pd_result
+
+
+def test_empty_true_row_filter(scalars_dfs: Tuple[dataframe.DataFrame, pd.DataFrame]):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_bool: series.Series = typing.cast(series.Series, scalars_df["bool_col"])
+    pd_bool: pd.Series = scalars_pandas_df["bool_col"]
+    bf_false = bf_bool.notna() & (bf_bool != bf_bool)
+    pd_false = pd_bool.notna() & (pd_bool != pd_bool)
+
+    bf_result = scalars_df[bf_false].empty
+    pd_result = scalars_pandas_df[pd_false].empty
+
+    assert pd_result
+    assert bf_result == pd_result
+
+
+def test_empty_true_memtable(session: bigframes.Session):
+    bf_df = dataframe.DataFrame(session=session)
+    pd_df = pd.DataFrame()
+
+    bf_result = bf_df.empty
+    pd_result = pd_df.empty
+
+    assert pd_result
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("drop",),
+    ((True,), (False,)),
+)
+def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
+    df = scalars_df_index.reset_index(drop=drop)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.reset_index(drop=drop)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_then_filter(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    bf_filter = scalars_df_index["bool_col"].fillna(True)
+    bf_df = scalars_df_index.reset_index()[bf_filter]
+    bf_result = bf_df.to_pandas()
+    pd_filter = scalars_pandas_df_index["bool_col"].fillna(True)
+    pd_result = scalars_pandas_df_index.reset_index()[pd_filter]
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering and index keys
+    # post-filter will have gaps.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_with_unnamed_index(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+
+    scalars_df_index.index.name = None
+    scalars_pandas_df_index.index.name = None
+    df = scalars_df_index.reset_index(drop=False)
+    assert df.index.name is None
+
+    # reset_index(drop=False) creates a new column "index".
+    assert df.columns[0] == "index"
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.reset_index(drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_with_unnamed_multiindex(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    bf_df = dataframe.DataFrame(
+        ([1, 2, 3], [2, 5, 7]),
+        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
+    )
+    pd_df = pd.DataFrame(
+        ([1, 2, 3], [2, 5, 7]),
+        index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
+    )
+
+    bf_df = bf_df.reset_index()
+    pd_df = pd_df.reset_index()
+
+    assert pd_df.columns[0] == "level_0"
+    assert bf_df.columns[0] == "level_0"
+    assert pd_df.columns[1] == "level_1"
+    assert bf_df.columns[1] == "level_1"
+
+
+def test_reset_index_with_unnamed_index_and_index_column(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+
+    scalars_df_index.index.name = None
+    scalars_pandas_df_index.index.name = None
+    df = scalars_df_index.assign(index=scalars_df_index["int64_col"]).reset_index(
+        drop=False
+    )
+    assert df.index.name is None
+
+    # reset_index(drop=False) creates a new column "level_0" if the "index" column already exists.
+    assert df.columns[0] == "level_0"
+
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df_index.assign(
+        index=scalars_pandas_df_index["int64_col"]
+    ).reset_index(drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("drop",),
+    (
+        (True,),
+        (False,),
+    ),
+)
+@pytest.mark.parametrize(
+    ("append",),
+    (
+        (True,),
+        (False,),
+    ),
+)
+@pytest.mark.parametrize(
+    ("index_column",),
+    (("int64_too",), ("string_col",), ("timestamp_col",)),
+)
+def test_set_index(scalars_dfs, index_column, drop, append):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.set_index(index_column, append=append, drop=drop)
+    bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.set_index(index_column, append=append, drop=drop)
+
+    # Sort to disambiguate when there are duplicate index labels.
+    # Note: Doesn't use assert_pandas_df_equal_ignore_ordering because we get
+    # "ValueError: 'timestamp_col' is both an index level and a column label,
+    # which is ambiguous" when trying to sort by a column with the same name as
+    # the index.
+    bf_result = bf_result.sort_values("rowindex_2")
+    pd_result = pd_result.sort_values("rowindex_2")
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_set_index_key_error(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    with pytest.raises(KeyError):
+        scalars_pandas_df.set_index(["not_a_col"])
+    with pytest.raises(KeyError):
+        scalars_df.set_index(["not_a_col"])
+
+
+@pytest.mark.parametrize(
+    ("ascending",),
+    ((True,), (False,)),
+)
+@pytest.mark.parametrize(
+    ("na_position",),
+    (("first",), ("last",)),
+)
+def test_sort_index(scalars_dfs, ascending, na_position):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.set_index(index_column)
+    bf_result = df.sort_index(ascending=ascending, na_position=na_position).to_pandas()
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(
+        ascending=ascending, na_position=na_position
+    )
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_dataframe_sort_index_inplace(scalars_dfs):
+    index_column = "int64_col"
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df = scalars_df.copy().set_index(index_column)
+    df.sort_index(ascending=False, inplace=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df.set_index(index_column).sort_index(ascending=False)
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_df_abs(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "int64_too", "float64_col"]
+
+    bf_result = scalars_df[columns].abs()
+    pd_result = scalars_pandas_df[columns].abs()
+
+    assert_dfs_equivalent(pd_result, bf_result)
+
+
+def test_df_pos(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (+scalars_df[["int64_col", "numeric_col"]]).to_pandas()
+    pd_result = +scalars_pandas_df[["int64_col", "numeric_col"]]
+
+    assert_pandas_df_equal(pd_result, bf_result)
+
+
+def test_df_neg(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = (-scalars_df[["int64_col", "numeric_col"]]).to_pandas()
+    pd_result = -scalars_pandas_df[["int64_col", "numeric_col"]]
+
+    assert_pandas_df_equal(pd_result, bf_result)
+
+
+def test_df_invert(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "bool_col"]
+
+    bf_result = (~scalars_df[columns]).to_pandas()
+    pd_result = ~scalars_pandas_df[columns]
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_isnull(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
+    bf_result = scalars_df[columns].isnull().to_pandas()
+    pd_result = scalars_pandas_df[columns].isnull()
+
+    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
+    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
+    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
+    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
+    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
+    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_df_notnull(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    columns = ["int64_col", "int64_too", "string_col", "bool_col"]
+    bf_result = scalars_df[columns].notnull().to_pandas()
+    pd_result = scalars_pandas_df[columns].notnull()
+
+    # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is
+    # `BooleanDtype` but the `pd_result.dtype` is `bool`.
+    pd_result["int64_col"] = pd_result["int64_col"].astype(pd.BooleanDtype())
+    pd_result["int64_too"] = pd_result["int64_too"].astype(pd.BooleanDtype())
+    pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
+    pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("left_labels", "right_labels", "overwrite", "fill_value"),
+    [
+        (["a", "b", "c"], ["c", "a", "b"], True, None),
+        (["a", "b", "c"], ["c", "a", "b"], False, None),
+        (["a", "b", "c"], ["a", "b", "c"], False, 2),
+    ],
+    ids=[
+        "one_one_match_overwrite",
+        "one_one_match_no_overwrite",
+        "exact_match",
+    ],
+)
+def test_combine(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+    left_labels,
+    right_labels,
+    overwrite,
+    fill_value,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns]
+    bf_df_a.columns = left_labels
+    bf_df_b = scalars_df_2_index[columns]
+    bf_df_b.columns = right_labels
+    bf_result = bf_df_a.combine(
+        bf_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    ).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns]
+    pd_df_a.columns = left_labels
+    pd_df_b = scalars_pandas_df_index[columns]
+    pd_df_b.columns = right_labels
+    pd_result = pd_df_a.combine(
+        pd_df_b,
+        lambda x, y: x**2 + 2 * x * y + y**2,
+        overwrite=overwrite,
+        fill_value=fill_value,
+    )
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("overwrite", "filter_func"),
+    [
+        (True, None),
+        (False, None),
+        (True, lambda x: x.isna() | (x % 2 == 0)),
+    ],
+    ids=[
+        "default",
+        "overwritefalse",
+        "customfilter",
+    ],
+)
+def test_df_update(overwrite, filter_func):
+    if pd.__version__.startswith("1."):
+        pytest.skip("dtype handled differently in pandas 1.x.")
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
+    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
+
+    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
+
+
+def test_df_idxmin():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmin().to_pandas()
+    pd_result = pd_df.idxmin()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_idxmax():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmax().to_pandas()
+    pd_result = pd_df.idxmax()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("join", "axis"),
+    [
+        ("outer", None),
+        ("outer", 0),
+        ("outer", 1),
+        ("left", 0),
+        ("right", 1),
+        ("inner", None),
+        ("inner", 1),
+    ],
+)
+def test_df_align(join, axis):
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
+    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
+
+    # Don't check dtype as pandas does unnecessary float conversion
+    assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
+        bf_result2, dataframe.DataFrame
+    )
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
+
+
+def test_combine_first(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns].iloc[0:6]
+    bf_df_a.columns = ["a", "b", "c"]
+    bf_df_b = scalars_df_2_index[columns].iloc[2:8]
+    bf_df_b.columns = ["b", "a", "d"]
+    bf_result = bf_df_a.combine_first(bf_df_b).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns].iloc[0:6]
+    pd_df_a.columns = ["a", "b", "c"]
+    pd_df_b = scalars_pandas_df_index[columns].iloc[2:8]
+    pd_df_b.columns = ["b", "a", "d"]
+    pd_result = pd_df_a.combine_first(pd_df_b)
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_corr_w_invalid_parameters(scalars_dfs):
+    columns = ["int64_too", "int64_col", "float64_col"]
+    scalars_df, _ = scalars_dfs
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[columns].corr(method="kendall")
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[columns].corr(min_periods=1)
+
+
+@pytest.mark.parametrize(
+    ("columns", "numeric_only"),
+    [
+        (["bool_col", "int64_col", "float64_col"], True),
+        (["bool_col", "int64_col", "float64_col"], False),
+        (["bool_col", "int64_col", "float64_col", "string_col"], True),
+        pytest.param(
+            ["bool_col", "int64_col", "float64_col", "string_col"],
+            False,
+            marks=pytest.mark.xfail(
+                raises=NotImplementedError,
+            ),
+        ),
+    ],
+)
+def test_cov_w_numeric_only(scalars_dfs, columns, numeric_only):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].cov(numeric_only=numeric_only).to_pandas()
+    pd_result = scalars_pandas_df[columns].cov(numeric_only=numeric_only)
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+    # Only check row order in ordered mode.
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_dtype=False,
+        check_index_type=False,
+        check_like=~scalars_df._block.session._strictly_ordered,
+    )
+
+
+def test_df_corrwith_df(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too"]
+    r_cols = ["int64_too", "float64_col"]
+
+    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_cols]).to_pandas()
+    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_cols])
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_corrwith_df_numeric_only(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
+    r_cols = ["int64_too", "float64_col", "bool_col"]
+
+    bf_result = (
+        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=True).to_pandas()
+    )
+    pd_result = scalars_pandas_df[l_cols].corrwith(
+        scalars_pandas_df[r_cols], numeric_only=True
+    )
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_corrwith_df_non_numeric_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too", "string_col"]
+    r_cols = ["int64_too", "float64_col", "bool_col"]
+
+    with pytest.raises(NotImplementedError):
+        scalars_df[l_cols].corrwith(scalars_df[r_cols], numeric_only=False)
+
+
+def test_df_corrwith_series(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    l_cols = ["int64_col", "float64_col", "int64_too"]
+    r_col = "float64_col"
+
+    bf_result = scalars_df[l_cols].corrwith(scalars_df[r_col]).to_pandas()
+    pd_result = scalars_pandas_df[l_cols].corrwith(scalars_pandas_df[r_col])
+
+    # BigFrames and Pandas differ in their data type handling:
+    # - Column types: BigFrames uses Float64, Pandas uses float64.
+    # - Index types: BigFrames uses strign, Pandas uses object.
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        operator.add,
+        operator.sub,
+        operator.mul,
+        operator.truediv,
+        # operator.floordiv,
+        operator.eq,
+        operator.ne,
+        operator.gt,
+        operator.ge,
+        operator.lt,
+        operator.le,
+    ],
+    ids=[
+        "add",
+        "subtract",
+        "multiply",
+        "true_divide",
+        # "floor_divide",
+        "eq",
+        "ne",
+        "gt",
+        "ge",
+        "lt",
+        "le",
+    ],
+)
+# TODO(garrettwu): deal with NA values
+@pytest.mark.parametrize(("other_scalar"), [1, 2.5, 0, 0.0])
+@pytest.mark.parametrize(("reverse_operands"), [True, False])
+def test_scalar_binop(scalars_dfs, op, other_scalar, reverse_operands):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    columns = ["int64_col", "float64_col"]
+
+    maybe_reversed_op = (lambda x, y: op(y, x)) if reverse_operands else op
+
+    bf_result = maybe_reversed_op(scalars_df[columns], other_scalar).to_pandas()
+    pd_result = maybe_reversed_op(scalars_pandas_df[columns], other_scalar)
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(("other_scalar"), [1, -2])
+def test_mod(scalars_dfs, other_scalar):
+    # Zero case excluded as pandas produces 0 result for Int64 inputs rather than NA/NaN.
+    # This is likely a pandas bug as mod 0 is undefined in other dtypes, and most programming languages.
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = (scalars_df[["int64_col", "int64_too"]] % other_scalar).to_pandas()
+    pd_result = scalars_pandas_df[["int64_col", "int64_too"]] % other_scalar
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+def test_scalar_binop_str_exception(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    columns = ["string_col"]
+    with pytest.raises(TypeError, match="Cannot add dtypes"):
+        (scalars_df[columns] + 1).to_pandas()
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x, y: x.add(y, axis="index")),
+        (lambda x, y: x.radd(y, axis="index")),
+        (lambda x, y: x.sub(y, axis="index")),
+        (lambda x, y: x.rsub(y, axis="index")),
+        (lambda x, y: x.mul(y, axis="index")),
+        (lambda x, y: x.rmul(y, axis="index")),
+        (lambda x, y: x.truediv(y, axis="index")),
+        (lambda x, y: x.rtruediv(y, axis="index")),
+        # (lambda x, y: x.floordiv(y, axis="index")),
+        # (lambda x, y: x.floordiv(y, axis="index")),
+        (lambda x, y: x.gt(y, axis="index")),
+        (lambda x, y: x.ge(y, axis="index")),
+        (lambda x, y: x.lt(y, axis="index")),
+        (lambda x, y: x.le(y, axis="index")),
+    ],
+    ids=[
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "truediv",
+        "rtruediv",
+        # "floordiv",
+        # "rfloordiv",
+        "gt",
+        "ge",
+        "lt",
+        "le",
+    ],
+)
+def test_series_binop_axis_index(
+    scalars_dfs,
+    op,
+):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    df_columns = ["int64_col", "float64_col"]
+    series_column = "int64_too"
+
+    bf_result = op(scalars_df[df_columns], scalars_df[series_column]).to_pandas()
+    pd_result = op(scalars_pandas_df[df_columns], scalars_pandas_df[series_column])
+
+    assert_pandas_df_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("input"),
+    [
+        ((1000, 2000, 3000)),
+        (pd.Index([1000, 2000, 3000])),
+        (pd.Series((1000, 2000), index=["int64_too", "float64_col"])),
+    ],
+    ids=[
+        "tuple",
+        "pd_index",
+        "pd_series",
+    ],
+)
+def test_listlike_binop_axis_1_in_memory_data(scalars_dfs, input):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = scalars_df[df_columns].add(input, axis=1).to_pandas()
+    if hasattr(input, "to_pandas"):
+        input = input.to_pandas()
+    pd_result = scalars_pandas_df[df_columns].add(input, axis=1)
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_reverse_binop_pandas(scalars_dfs):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_series = pd.Series([100, 200, 300])
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = pd_series + scalars_df[df_columns].to_pandas()
+    pd_result = pd_series + scalars_pandas_df[df_columns]
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_listlike_binop_axis_1_bf_index(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_result = (
+        scalars_df[df_columns]
+        .add(bf_indexes.Index([1000, 2000, 3000]), axis=1)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[df_columns].add(pd.Index([1000, 2000, 3000]), axis=1)
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_binop_with_self_aggregate(session, scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_columns = ["int64_col", "float64_col", "int64_too"]
+
+    bf_df = scalars_df[df_columns]
+    bf_result = (bf_df - bf_df.mean()).to_pandas()
+
+    pd_df = scalars_pandas_df[df_columns]
+    pd_result = pd_df - pd_df.mean()
+
+    assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("left_labels", "right_labels"),
+    [
+        (["a", "a", "b"], ["c", "c", "d"]),
+        (["a", "b", "c"], ["c", "a", "b"]),
+        (["a", "c", "c"], ["c", "a", "c"]),
+        (["a", "b", "c"], ["a", "b", "c"]),
+    ],
+    ids=[
+        "no_overlap",
+        "one_one_match",
+        "multi_match",
+        "exact_match",
+    ],
+)
+def test_binop_df_df_binary_op(
+    scalars_df_index,
+    scalars_df_2_index,
+    scalars_pandas_df_index,
+    left_labels,
+    right_labels,
+):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pd.NA vs NaN not handled well in pandas 1.x.")
+    columns = ["int64_too", "int64_col", "float64_col"]
+
+    bf_df_a = scalars_df_index[columns]
+    bf_df_a.columns = left_labels
+    bf_df_b = scalars_df_2_index[columns]
+    bf_df_b.columns = right_labels
+    bf_result = (bf_df_a - bf_df_b).to_pandas()
+
+    pd_df_a = scalars_pandas_df_index[columns]
+    pd_df_a.columns = left_labels
+    pd_df_b = scalars_pandas_df_index[columns]
+    pd_df_b.columns = right_labels
+    pd_result = pd_df_a - pd_df_b
+
+    # Some dtype inconsistency for all-NULL columns
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+# Differnt table will only work for explicit index, since default index orders are arbitrary.
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_series_binop_add_different_table(
+    scalars_df_index, scalars_pandas_df_index, scalars_df_2_index, ordered
+):
+    df_columns = ["int64_col", "float64_col"]
+    series_column = "int64_too"
+
+    bf_result = (
+        scalars_df_index[df_columns]
+        .add(scalars_df_2_index[series_column], axis="index")
+        .to_pandas(ordered=ordered)
+    )
+    pd_result = scalars_pandas_df_index[df_columns].add(
+        scalars_pandas_df_index[series_column], axis="index"
+    )
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+# TODO(garrettwu): Test series binop with different index
+
+all_joins = pytest.mark.parametrize(
+    ("how",),
+    (("outer",), ("left",), ("right",), ("inner",), ("cross",)),
+)
+
+
+@all_joins
+def test_join_same_table(scalars_dfs, how):
+    bf_df, pd_df = scalars_dfs
+    if not bf_df._session._strictly_ordered and how == "cross":
+        pytest.skip("Cross join not supported in partial ordering mode.")
+
+    bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
+    bf_df_a = bf_df_a.sort_index()
+
+    bf_df_b = bf_df.set_index("int64_too")[["float64_col"]]
+    bf_df_b = bf_df_b[bf_df_b.float64_col > 0]
+    bf_df_b = bf_df_b.sort_values("float64_col")
+
+    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
+
+    pd_df_a = pd_df.set_index("int64_too")[["string_col", "int64_col"]].sort_index()
+    pd_df_a = pd_df_a.sort_index()
+
+    pd_df_b = pd_df.set_index("int64_too")[["float64_col"]]
+    pd_df_b = pd_df_b[pd_df_b.float64_col > 0]
+    pd_df_b = pd_df_b.sort_values("float64_col")
+
+    pd_result = pd_df_a.join(pd_df_b, how=how)
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@all_joins
+def test_join_different_table(
+    scalars_df_index, scalars_df_2_index, scalars_pandas_df_index, how
+):
+    bf_df_a = scalars_df_index[["string_col", "int64_col"]]
+    bf_df_b = scalars_df_2_index.dropna()[["float64_col"]]
+    bf_result = bf_df_a.join(bf_df_b, how=how).to_pandas()
+    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col"]]
+    pd_df_b = scalars_pandas_df_index.dropna()[["float64_col"]]
+    pd_result = pd_df_a.join(pd_df_b, how=how)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+def test_join_duplicate_columns_raises_not_implemented(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    df_a = scalars_df[["string_col", "float64_col"]]
+    df_b = scalars_df[["float64_col"]]
+    with pytest.raises(NotImplementedError):
+        df_a.join(df_b, how="outer").to_pandas()
+
+
+@all_joins
+def test_join_param_on(scalars_dfs, how):
+    bf_df, pd_df = scalars_dfs
+
+    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
+    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
+    bf_df_b = bf_df[["float64_col"]]
+
+    if how == "cross":
+        with pytest.raises(ValueError):
+            bf_df_a.join(bf_df_b, on="rowindex_2", how=how)
+    else:
+        bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas()
+
+        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
+        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
+        pd_df_b = pd_df[["float64_col"]]
+        pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
+        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@all_joins
+def test_df_join_series(scalars_dfs, how):
+    bf_df, pd_df = scalars_dfs
+
+    bf_df_a = bf_df[["string_col", "int64_col", "rowindex_2"]]
+    bf_df_a = bf_df_a.assign(rowindex_2=bf_df_a["rowindex_2"] + 2)
+    bf_series_b = bf_df["float64_col"]
+
+    if how == "cross":
+        with pytest.raises(ValueError):
+            bf_df_a.join(bf_series_b, on="rowindex_2", how=how)
+    else:
+        bf_result = bf_df_a.join(bf_series_b, on="rowindex_2", how=how).to_pandas()
+
+        pd_df_a = pd_df[["string_col", "int64_col", "rowindex_2"]]
+        pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
+        pd_series_b = pd_df["float64_col"]
+        pd_result = pd_df_a.join(pd_series_b, on="rowindex_2", how=how)
+        assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+
+
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+        ("int64_col", False, "first"),
+        (["bool_col", "int64_col"], [False, True], "last"),
+        (["bool_col", "int64_col"], [True, False], "first"),
+    ],
+)
+def test_dataframe_sort_values(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_result = scalars_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    ).to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("by", "ascending", "na_position"),
+    [
+        ("int64_col", True, "first"),
+        (["bool_col", "int64_col"], True, "last"),
+    ],
+)
+def test_dataframe_sort_values_inplace(
+    scalars_df_index, scalars_pandas_df_index, by, ascending, na_position
+):
+    # Test needs values to be unique
+    bf_sorted = scalars_df_index.copy()
+    bf_sorted.sort_values(
+        by, ascending=ascending, na_position=na_position, inplace=True
+    )
+    bf_result = bf_sorted.to_pandas()
+    pd_result = scalars_pandas_df_index.sort_values(
+        by, ascending=ascending, na_position=na_position
+    )
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_dataframe_sort_values_invalid_input(scalars_df_index):
+    with pytest.raises(KeyError):
+        scalars_df_index.sort_values(by=scalars_df_index["int64_col"])
+
+
+def test_dataframe_sort_values_stable(scalars_df_index, scalars_pandas_df_index):
+    bf_result = (
+        scalars_df_index.sort_values("int64_col", kind="stable")
+        .sort_values("bool_col", kind="stable")
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df_index.sort_values(
+        "int64_col", kind="stable"
+    ).sort_values("bool_col", kind="stable")
+
+    pandas.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("operator", "columns"),
+    [
+        pytest.param(lambda x: x.cumsum(), ["float64_col", "int64_too"]),
+        # pytest.param(lambda x: x.cumprod(), ["float64_col", "int64_too"]),
+        pytest.param(
+            lambda x: x.cumprod(),
+            ["string_col"],
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+            ),
+        ),
+    ],
+    ids=[
+        "cumsum",
+        # "cumprod",
+        "non-numeric",
+    ],
+)
+def test_dataframe_numeric_analytic_op(
+    scalars_df_index, scalars_pandas_df_index, operator, columns
+):
+    # TODO: Add nullable ints (pandas 1.x has poor behavior on these)
+    bf_series = operator(scalars_df_index[columns])
+    pd_series = operator(scalars_pandas_df_index[columns])
+    bf_result = bf_series.to_pandas()
+    pd.testing.assert_frame_equal(pd_series, bf_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("operator"),
+    [
+        (lambda x: x.cummin()),
+        (lambda x: x.cummax()),
+        (lambda x: x.shift(2)),
+        (lambda x: x.shift(-2)),
+    ],
+    ids=[
+        "cummin",
+        "cummax",
+        "shiftpostive",
+        "shiftnegative",
+    ],
+)
+def test_dataframe_general_analytic_op(
+    scalars_df_index, scalars_pandas_df_index, operator
+):
+    col_names = ["int64_too", "float64_col", "int64_col", "bool_col"]
+    bf_series = operator(scalars_df_index[col_names])
+    pd_series = operator(scalars_pandas_df_index[col_names])
+    bf_result = bf_series.to_pandas()
+    pd.testing.assert_frame_equal(
+        pd_series,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_diff(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].diff(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].diff(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("periods",),
+    [
+        (1,),
+        (2,),
+        (-1,),
+    ],
+)
+def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods):
+    col_names = ["int64_too", "float64_col", "int64_col"]
+    bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].pct_change(periods=periods)
+    pd.testing.assert_frame_equal(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_dataframe_agg_single_string(scalars_dfs):
+    numeric_cols = ["int64_col", "int64_too", "float64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[numeric_cols].agg("sum").to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg("sum")
+
+    assert bf_result.dtype == "Float64"
+    pd.testing.assert_series_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("agg",),
+    (
+        ("sum",),
+        ("size",),
+    ),
+)
+def test_dataframe_agg_int_single_string(scalars_dfs, agg):
+    numeric_cols = ["int64_col", "int64_too", "bool_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df[numeric_cols].agg(agg).to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg(agg)
+
+    assert bf_result.dtype == "Int64"
+    pd.testing.assert_series_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_dataframe_agg_multi_string(scalars_dfs):
+    numeric_cols = ["int64_col", "int64_too", "float64_col"]
+    aggregations = [
+        "sum",
+        "mean",
+        "median",
+        "std",
+        "var",
+        "min",
+        "max",
+        "nunique",
+        "count",
+    ]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[numeric_cols].agg(aggregations)
+    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    # Drop median, as it's an approximation.
+    bf_median = bf_result.loc["median", :]
+    bf_result = bf_result.drop(labels=["median"])
+    pd_result = pd_result.drop(labels=["median"])
+
+    assert_dfs_equivalent(pd_result, bf_result, check_index_type=False)
+
+    # Double-check that median is at least plausible.
+    assert (
+        (bf_result.loc["min", :] <= bf_median) & (bf_median <= bf_result.loc["max", :])
+    ).all()
+
+
+def test_dataframe_agg_int_multi_string(scalars_dfs):
+    numeric_cols = ["int64_col", "int64_too", "bool_col"]
+    aggregations = [
+        "sum",
+        "nunique",
+        "count",
+        "size",
+    ]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[numeric_cols].agg(aggregations).to_pandas()
+    pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
+
+    for dtype in bf_result.dtypes:
+        assert dtype == "Int64"
+
+    # Pandas may produce narrower numeric types
+    # Pandas has object index type
+    pd.testing.assert_frame_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_transpose():
+    # Include some floats to ensure type coercion
+    values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
+    # Test complex case of both axes being multi-indices with non-unique elements
+
+    columns: pandas.Index = pd.Index(
+        ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
+    )
+    columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
+
+    index: pandas.Index = pd.Index(
+        ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
+    )
+    rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
+
+    pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
+    bf_df = dataframe.DataFrame(values, index=rows_multi, columns=columns_multi)
+
+    pd_result = pd_df.T
+    bf_result = bf_df.T.to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
+
+
+def test_df_transpose_error():
+    with pytest.raises(TypeError, match="Cannot coerce.*to a common type."):
+        dataframe.DataFrame([[1, "hello"], [2, "world"]]).transpose()
+
+
+def test_df_transpose_repeated_uses_cache():
+    bf_df = dataframe.DataFrame([[1, 2.5], [2, 3.5]])
+    pd_df = pandas.DataFrame([[1, 2.5], [2, 3.5]])
+    # Transposing many times so that operation will fail from complexity if not using cache
+    for i in range(10):
+        # Cache still works even with simple scalar binop
+        bf_df = bf_df.transpose() + i
+        pd_df = pd_df.transpose() + i
+
+    pd.testing.assert_frame_equal(
+        pd_df, bf_df.to_pandas(), check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_stack(scalars_dfs):
+    if pandas.__version__.startswith("1.") or pandas.__version__.startswith("2.0"):
+        pytest.skip("pandas <2.1 uses different stack implementation")
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = ["int64_col", "int64_too", "rowindex_2"]
+
+    bf_result = scalars_df[columns].stack().to_pandas()
+    pd_result = scalars_pandas_df[columns].stack(future_stack=True)
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
+def test_df_melt_default(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = ["int64_col", "int64_too", "rowindex_2"]
+
+    bf_result = scalars_df[columns].melt().to_pandas()
+    pd_result = scalars_pandas_df[columns].melt()
+
+    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+
+
+def test_df_melt_parameterized(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+
+    bf_result = scalars_df.melt(
+        var_name="alice",
+        value_name="bob",
+        id_vars=["string_col"],
+        value_vars=["int64_col", "int64_too"],
+    ).to_pandas()
+    pd_result = scalars_pandas_df.melt(
+        var_name="alice",
+        value_name="bob",
+        id_vars=["string_col"],
+        value_vars=["int64_col", "int64_too"],
+    )
+
+    # Pandas produces int64 index, Bigframes produces Int64 (nullable)
+    pd.testing.assert_frame_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_unstack(scalars_dfs, ordered):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = [
+        "rowindex_2",
+        "int64_col",
+        "int64_too",
+    ]
+
+    # unstack on mono-index produces series
+    bf_result = scalars_df[columns].unstack().to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df[columns].unstack()
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
+    )
+
+
+def test_ipython_key_completions_with_drop(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = "string_col"
+    bf_dataframe = scalars_df.drop(columns=col_names)
+    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
+    expected = pd_dataframe.columns.tolist()
+
+    results = bf_dataframe._ipython_key_completions_()
+
+    assert col_names not in results
+    assert results == expected
+    # _ipython_key_completions_ is called with square brackets
+    # so only column names are relevant with tab completion
+    assert "to_gbq" not in results
+    assert "merge" not in results
+    assert "drop" not in results
+
+
+def test_ipython_key_completions_with_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"string_col": "a_renamed_column"}
+    bf_dataframe = scalars_df.rename(columns=col_name_dict)
+    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
+    expected = pd_dataframe.columns.tolist()
+
+    results = bf_dataframe._ipython_key_completions_()
+
+    assert "string_col" not in results
+    assert "a_renamed_column" in results
+    assert results == expected
+    # _ipython_key_completions_ is called with square brackets
+    # so only column names are relevant with tab completion
+    assert "to_gbq" not in results
+    assert "merge" not in results
+    assert "drop" not in results
+
+
+def test__dir__with_drop(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_names = "string_col"
+    bf_dataframe = scalars_df.drop(columns=col_names)
+    pd_dataframe = scalars_pandas_df.drop(columns=col_names)
+    expected = pd_dataframe.columns.tolist()
+
+    results = dir(bf_dataframe)
+
+    assert col_names not in results
+    assert frozenset(expected) <= frozenset(results)
+    # __dir__ is called with a '.' and displays all methods, columns names, etc.
+    assert "to_gbq" in results
+    assert "merge" in results
+    assert "drop" in results
+
+
+def test__dir__with_rename(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name_dict = {"string_col": "a_renamed_column"}
+    bf_dataframe = scalars_df.rename(columns=col_name_dict)
+    pd_dataframe = scalars_pandas_df.rename(columns=col_name_dict)
+    expected = pd_dataframe.columns.tolist()
+
+    results = dir(bf_dataframe)
+
+    assert "string_col" not in results
+    assert "a_renamed_column" in results
+    assert frozenset(expected) <= frozenset(results)
+    # __dir__ is called with a '.' and displays all methods, columns names, etc.
+    assert "to_gbq" in results
+    assert "merge" in results
+    assert "drop" in results
+
+
+@pytest.mark.parametrize(
+    ("start", "stop", "step"),
+    [
+        (0, 0, None),
+        (None, None, None),
+        (1, None, None),
+        (None, 4, None),
+        (None, None, 2),
+        (None, 50000000000, 1),
+        (5, 4, None),
+        (3, None, 2),
+        (1, 7, 2),
+        (1, 7, 50000000000),
+    ],
+)
+def test_iloc_slice(scalars_df_index, scalars_pandas_df_index, start, stop, step):
+    bf_result = scalars_df_index.iloc[start:stop:step].to_pandas()
+    pd_result = scalars_pandas_df_index.iloc[start:stop:step]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_iloc_slice_zero_step(scalars_df_index):
+    with pytest.raises(ValueError):
+        scalars_df_index.iloc[0:0:0]
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index, ordered):
+    bf_result = scalars_df_index.iloc[1:].iloc[1:].to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df_index.iloc[1:].iloc[1:]
+
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [0, 5, -2, (2,)],
+)
+def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(2, 5), (5, 0), (0, 0)],
+)
+def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(slice(None), [1, 2, 3]), (slice(1, 7, 2), [2, 5, 3])],
+)
+def test_iloc_tuple_multi_columns(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iloc[index].to_pandas()
+    pd_result = scalars_pandas_df_index.iloc[index]
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_iloc_tuple_multi_columns_single_row(scalars_df_index, scalars_pandas_df_index):
+    index = (2, [2, 1, 3, -4])
+    bf_result = scalars_df_index.iloc[index]
+    pd_result = scalars_pandas_df_index.iloc[index]
+    pd.testing.assert_series_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("index", "error"),
+    [
+        ((1, 1, 1), pd.errors.IndexingError),
+        (("asd", "asd", "asd"), pd.errors.IndexingError),
+        (("asd"), TypeError),
+    ],
+)
+def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error):
+    with pytest.raises(error):
+        scalars_df_index.iloc[index]
+    with pytest.raises(error):
+        scalars_pandas_df_index.iloc[index]
+
+
+@pytest.mark.parametrize(
+    "index",
+    [(2, 5), (5, 0), (0, 0)],
+)
+def test_iat(scalars_df_index, scalars_pandas_df_index, index):
+    bf_result = scalars_df_index.iat[index]
+    pd_result = scalars_pandas_df_index.iat[index]
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("index", "error"),
+    [
+        (0, TypeError),
+        ("asd", ValueError),
+        ((1, 2, 3), TypeError),
+        (("asd", "asd"), ValueError),
+    ],
+)
+def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error):
+    with pytest.raises(error):
+        scalars_pandas_df_index.iat[index]
+    with pytest.raises(error):
+        scalars_df_index.iat[index]
+
+
+def test_iloc_single_integer_out_of_bound_error(
+    scalars_df_index, scalars_pandas_df_index
+):
+    with pytest.raises(IndexError, match="single positional indexer is out-of-bounds"):
+        scalars_df_index.iloc[99]
+
+
+def test_loc_bool_series(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[scalars_df_index.bool_col].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[scalars_pandas_df_index.bool_col]
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_column(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[:, "int64_col"].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[:, "int64_col"]
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[:, scalars_df_index.dtypes == "Int64"].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.dtypes == "Int64"
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_select_with_column_condition_bf_series(
+    scalars_df_index, scalars_pandas_df_index
+):
+    # (b/347072677) GEOGRAPH type doesn't support DISTINCT op
+    columns = [
+        item for item in scalars_pandas_df_index.columns if item != "geography_col"
+    ]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    size_half = len(scalars_pandas_df_index) / 2
+    bf_result = scalars_df_index.loc[
+        :, scalars_df_index.nunique() > size_half
+    ].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.nunique() > size_half
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
+        "string_col", drop=False
+    )
+    index = "Hello, World!"
+    bf_result = scalars_df_index.loc[index]
+    pd_result = scalars_pandas_df_index.loc[index]
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
+    index = -2345
+    bf_result = scalars_df_index.loc[index]
+    pd_result = scalars_pandas_df_index.loc[index]
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_at_with_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index(
+        "string_col", drop=False
+    )
+    index = "Hello, World!"
+    bf_result = scalars_df_index.at[index, "int64_too"]
+    pd_result = scalars_pandas_df_index.at[index, "int64_too"]
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.set_index("int64_too", drop=False)
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False)
+    index = -2345
+    bf_result = scalars_df_index.at[index, "string_col"]
+    pd_result = scalars_pandas_df_index.at[index, "string_col"]
+    assert bf_result == pd_result
+
+
+def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 0, "new_col"] = 99
+    pd_df.loc[pd_df["int64_too"] == 0, "new_col"] = 99
+
+    # pandas uses float64 instead
+    pd_df["new_col"] = pd_df["new_col"].astype("Float64")
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+@pytest.mark.parametrize(
+    ("col", "value"),
+    [
+        ("string_col", "hello"),
+        ("int64_col", 3),
+        ("float64_col", 3.5),
+    ],
+)
+def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df.loc[bf_df["int64_too"] == 1, col] = value
+    pd_df.loc[pd_df["int64_too"] == 1, col] = value
+
+    pd.testing.assert_frame_equal(
+        bf_df.to_pandas(),
+        pd_df,
+    )
+
+
+def test_loc_setitem_bool_series_scalar_error(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("this loc overload not supported in pandas 1.x.")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+
+    with pytest.raises(Exception):
+        bf_df.loc[bf_df["int64_too"] == 1, "string_col"] = 99
+    with pytest.raises(Exception):
+        pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99
+
+
+@pytest.mark.parametrize(
+    ("col", "op"),
+    [
+        # Int aggregates
+        pytest.param("int64_col", lambda x: x.sum(), id="int-sum"),
+        pytest.param("int64_col", lambda x: x.min(), id="int-min"),
+        pytest.param("int64_col", lambda x: x.max(), id="int-max"),
+        pytest.param("int64_col", lambda x: x.count(), id="int-count"),
+        pytest.param("int64_col", lambda x: x.nunique(), id="int-nunique"),
+        # Float aggregates
+        pytest.param("float64_col", lambda x: x.count(), id="float-count"),
+        pytest.param("float64_col", lambda x: x.nunique(), id="float-nunique"),
+        # Bool aggregates
+        pytest.param("bool_col", lambda x: x.sum(), id="bool-sum"),
+        pytest.param("bool_col", lambda x: x.count(), id="bool-count"),
+        pytest.param("bool_col", lambda x: x.nunique(), id="bool-nunique"),
+        # String aggregates
+        pytest.param("string_col", lambda x: x.count(), id="string-count"),
+        pytest.param("string_col", lambda x: x.nunique(), id="string-nunique"),
+    ],
+)
+def test_dataframe_aggregate_int(scalars_df_index, scalars_pandas_df_index, col, op):
+    bf_result = op(scalars_df_index[[col]]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[[col]])
+
+    # Check dtype separately
+    assert bf_result.dtype == "Int64"
+    # Is otherwise "object" dtype
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    # Pandas may produce narrower numeric types
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("col", "op"),
+    [
+        pytest.param("bool_col", lambda x: x.min(), id="bool-min"),
+        pytest.param("bool_col", lambda x: x.max(), id="bool-max"),
+    ],
+)
+def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col, op):
+    bf_result = op(scalars_df_index[[col]]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[[col]])
+
+    # Check dtype separately
+    assert bf_result.dtype == "boolean"
+
+    # Pandas may produce narrower numeric types
+    # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("op", "bf_dtype"),
+    [
+        (lambda x: x.sum(numeric_only=True), "Float64"),
+        (lambda x: x.mean(numeric_only=True), "Float64"),
+        (lambda x: x.min(numeric_only=True), "Float64"),
+        (lambda x: x.max(numeric_only=True), "Float64"),
+        (lambda x: x.std(numeric_only=True), "Float64"),
+        (lambda x: x.var(numeric_only=True), "Float64"),
+        (lambda x: x.count(numeric_only=False), "Int64"),
+        (lambda x: x.nunique(), "Int64"),
+    ],
+    ids=["sum", "mean", "min", "max", "std", "var", "count", "nunique"],
+)
+def test_dataframe_aggregates(scalars_dfs, op, bf_dtype):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs
+    col_names = ["int64_too", "float64_col", "string_col", "int64_col", "bool_col"]
+    bf_series = op(scalars_df_index[col_names])
+    bf_result = bf_series
+    pd_result = op(scalars_pandas_df_index[col_names])
+
+    # Check dtype separately
+    assert bf_result.dtype == bf_dtype
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    # Pandas has object index type
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+    assert_series_equivalent(
+        pd_result,
+        bf_result,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x: x.sum(axis=1, numeric_only=True)),
+        (lambda x: x.mean(axis=1, numeric_only=True)),
+        (lambda x: x.min(axis=1, numeric_only=True)),
+        (lambda x: x.max(axis=1, numeric_only=True)),
+        (lambda x: x.std(axis=1, numeric_only=True)),
+        (lambda x: x.var(axis=1, numeric_only=True)),
+    ],
+    ids=["sum", "mean", "min", "max", "std", "var"],
+)
+def test_dataframe_aggregates_axis_1(scalars_df_index, scalars_pandas_df_index, op):
+    col_names = ["int64_too", "int64_col", "float64_col", "bool_col", "string_col"]
+    bf_result = op(scalars_df_index[col_names]).to_pandas()
+    pd_result = op(scalars_pandas_df_index[col_names])
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+    # Pandas has object index type
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("op"),
+    [
+        (lambda x: x.all(bool_only=True)),
+        (lambda x: x.any(bool_only=True)),
+        (lambda x: x.all(axis=1, bool_only=True)),
+        (lambda x: x.any(axis=1, bool_only=True)),
+    ],
+    ids=["all_axis0", "any_axis0", "all_axis1", "any_axis1"],
+)
+def test_dataframe_bool_aggregates(scalars_df_index, scalars_pandas_df_index, op):
+    # Pandas will drop nullable 'boolean' dtype so we convert first to bool, then cast back later
+    scalars_df_index = scalars_df_index.assign(
+        bool_col=scalars_df_index.bool_col.fillna(False)
+    )
+    scalars_pandas_df_index = scalars_pandas_df_index.assign(
+        bool_col=scalars_pandas_df_index.bool_col.fillna(False).astype("bool")
+    )
+    bf_series = op(scalars_df_index)
+    pd_series = op(scalars_pandas_df_index).astype("boolean")
+    bf_result = bf_series.to_pandas()
+
+    pd_series.index = pd_series.index.astype(bf_result.index.dtype)
+    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
+
+
+def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index):
+    col_names = ["int64_too", "float64_col"]
+    bf_series = scalars_df_index[col_names].prod()
+    pd_series = scalars_pandas_df_index[col_names].prod()
+    bf_result = bf_series.to_pandas()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_series = pd_series.astype("Float64")
+    # Pandas has object index type
+    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
+
+
+def test_df_skew_too_few_values(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].head(2).skew().to_pandas()
+    pd_result = scalars_pandas_df[columns].head(2).skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_skew(scalars_dfs, ordered):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].skew().to_pandas(ordered=ordered)
+    pd_result = scalars_pandas_df[columns].skew()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    assert_series_equal(
+        pd_result, bf_result, check_index_type=False, ignore_order=not ordered
+    )
+
+
+def test_df_kurt_too_few_values(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].head(2).kurt().to_pandas()
+    pd_result = scalars_pandas_df[columns].head(2).kurt()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+def test_df_kurt(scalars_dfs):
+    columns = ["float64_col", "int64_col"]
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df[columns].kurt().to_pandas()
+    pd_result = scalars_pandas_df[columns].kurt()
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+
+
+def test_sample_raises_value_error(scalars_dfs):
+    scalars_df, _ = scalars_dfs
+    with pytest.raises(
+        ValueError, match="Only one of 'n' or 'frac' parameter can be specified."
+    ):
+        scalars_df.sample(frac=0.5, n=4)
+
+
+@pytest.mark.parametrize(
+    ("axis",),
+    [
+        (None,),
+        (0,),
+        (1,),
+    ],
+)
+def test_df_add_prefix(scalars_df_index, scalars_pandas_df_index, axis):
+    if pd.__version__.startswith("1."):
+        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
+    bf_result = scalars_df_index.add_prefix("prefix_", axis).to_pandas()
+
+    pd_result = scalars_pandas_df_index.add_prefix("prefix_", axis)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("axis",),
+    [
+        (0,),
+        (1,),
+    ],
+)
+def test_df_add_suffix(scalars_df_index, scalars_pandas_df_index, axis):
+    if pd.__version__.startswith("1."):
+        pytest.skip("add_prefix axis parameter not supported in pandas 1.x.")
+    bf_result = scalars_df_index.add_suffix("_suffix", axis).to_pandas()
+
+    pd_result = scalars_pandas_df_index.add_suffix("_suffix", axis)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+        check_index_type=False,
+    )
+
+
+def test_df_astype_error_error(session):
+    input = pd.DataFrame(["hello", "world", "3.11", "4000"])
+    with pytest.raises(ValueError):
+        session.read_pandas(input).astype("Float64", errors="bad_value")
+
+
+def test_df_columns_filter_items(scalars_df_index, scalars_pandas_df_index):
+    if pd.__version__.startswith("2.0") or pd.__version__.startswith("1."):
+        pytest.skip("pandas filter items behavior different pre-2.1")
+    bf_result = scalars_df_index.filter(items=["string_col", "int64_col"]).to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(items=["string_col", "int64_col"])
+    # Ignore column ordering as pandas order differently depending on version
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(axis=1),
+        pd_result.sort_index(axis=1),
+    )
+
+
+def test_df_columns_filter_like(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(like="64_col").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(like="64_col")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_columns_filter_regex(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.filter(regex="^[^_]+$").to_pandas()
+
+    pd_result = scalars_pandas_df_index.filter(regex="^[^_]+$")
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_rows_list(scalars_dfs):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs
+    bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1])
+
+    pd_result = scalars_pandas_df_index.reindex(index=[5, 1, 3, 99, 1])
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    assert_dfs_equivalent(
+        pd_result,
+        bf_result,
+    )
+
+
+def test_df_reindex_rows_index(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        index=pd.Index([5, 1, 3, 99, 1], name="newname")
+    )
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_nonunique(scalars_df_index):
+    with pytest.raises(ValueError):
+        # int64_too is non-unique
+        scalars_df_index.set_index("int64_too").reindex(
+            index=[5, 1, 3, 99, 1], validate=True
+        )
+
+
+def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    ).to_pandas()
+
+    pd_result = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"]
+    )
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_reindex_columns_with_same_order(scalars_df_index, scalars_pandas_df_index):
+    # First, make sure the two dataframes have the same columns in order.
+    columns = ["int64_col", "int64_too"]
+    bf = scalars_df_index[columns]
+    pd_df = scalars_pandas_df_index[columns]
+
+    bf_result = bf.reindex(columns=columns).to_pandas()
+    pd_result = pd_df.reindex(columns=columns)
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_equals_identical(scalars_df_index, scalars_pandas_df_index):
+    unsupported = [
+        "geography_col",
+    ]
+    scalars_df_index = scalars_df_index.drop(columns=unsupported)
+    scalars_pandas_df_index = scalars_pandas_df_index.drop(columns=unsupported)
+
+    bf_result = scalars_df_index.equals(scalars_df_index)
+    pd_result = scalars_pandas_df_index.equals(scalars_pandas_df_index)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_series(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index[["int64_col"]].equals(scalars_df_index["int64_col"])
+    pd_result = scalars_pandas_df_index[["int64_col"]].equals(
+        scalars_pandas_df_index["int64_col"]
+    )
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_different_dtype(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    bf_modified = scalars_df_index.copy()
+    bf_modified = bf_modified.astype("Float64")
+
+    pd_modified = scalars_pandas_df_index.copy()
+    pd_modified = pd_modified.astype("Float64")
+
+    bf_result = scalars_df_index.equals(bf_modified)
+    pd_result = scalars_pandas_df_index.equals(pd_modified)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_different_values(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    scalars_df_index = scalars_df_index[columns]
+    scalars_pandas_df_index = scalars_pandas_df_index[columns]
+
+    bf_modified = scalars_df_index.copy()
+    bf_modified["int64_col"] = bf_modified.int64_col + 1
+
+    pd_modified = scalars_pandas_df_index.copy()
+    pd_modified["int64_col"] = pd_modified.int64_col + 1
+
+    bf_result = scalars_df_index.equals(bf_modified)
+    pd_result = scalars_pandas_df_index.equals(pd_modified)
+
+    assert pd_result == bf_result
+
+
+def test_df_equals_extra_column(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_col", "int64_too"]
+    more_columns = ["int64_col", "int64_too", "float64_col"]
+
+    bf_result = scalars_df_index[columns].equals(scalars_df_index[more_columns])
+    pd_result = scalars_pandas_df_index[columns].equals(
+        scalars_pandas_df_index[more_columns]
+    )
+
+    assert pd_result == bf_result
+
+
+def test_df_reindex_like(scalars_df_index, scalars_pandas_df_index):
+    reindex_target_bf = scalars_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    bf_result = scalars_df_index.reindex_like(reindex_target_bf).to_pandas()
+
+    reindex_target_pd = scalars_pandas_df_index.reindex(
+        columns=["not_a_col", "int64_col", "int64_too"], index=[5, 1, 3, 99, 1]
+    )
+    pd_result = scalars_pandas_df_index.reindex_like(reindex_target_pd)
+
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    # Pandas uses float64 as default for newly created empty column, bf uses Float64
+    pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype())
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_values(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.values
+
+    pd_result = scalars_pandas_df_index.values
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+def test_df_to_numpy(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_numpy()
+
+    pd_result = scalars_pandas_df_index.to_numpy()
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+def test_df___array__(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.__array__()
+
+    pd_result = scalars_pandas_df_index.__array__()
+    # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe
+    pd.testing.assert_frame_equal(
+        pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False
+    )
+
+
+def test_df_getattr_attribute_error_when_pandas_has(scalars_df_index):
+    # swapaxes is implemented in pandas but not in bigframes
+    with pytest.raises(AttributeError):
+        scalars_df_index.swapaxes()
+
+
+def test_df_getattr_attribute_error(scalars_df_index):
+    with pytest.raises(AttributeError):
+        scalars_df_index.not_a_method()
+
+
+def test_df_getattr_axes():
+    df = dataframe.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    assert isinstance(df.index, bigframes.core.indexes.Index)
+    assert isinstance(df.columns, pandas.Index)
+    assert isinstance(df.my_column, series.Series)
+
+
+def test_df_setattr_index():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    pd_df.index = pandas.Index([4, 5])
+    bf_df.index = [4, 5]
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_setattr_columns():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
+
+    bf_df.columns = pandas.Index([4, 5, 6])
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_setattr_modify_column():
+    pd_df = pandas.DataFrame(
+        [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+    pd_df.my_column = [4, 5]
+    bf_df.my_column = [4, 5]
+
+    assert_pandas_df_equal(
+        pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
+    )
+
+
+def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index):
+    index_list = scalars_pandas_df_index.string_col.iloc[[0, 1, 1, 5]].values
+
+    scalars_df_index = scalars_df_index.set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
+
+    bf_result = scalars_df_index.loc[index_list].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index):
+    index_list = [3, 2, 1, 3, 2, 1]
+
+    bf_result = scalars_df_index.loc[index_list]
+    pd_result = scalars_pandas_df_index.loc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_list_multiindex(scalars_dfs):
+    scalars_df_index, scalars_pandas_df_index = scalars_dfs
+    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
+    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
+        ["string_col", "int64_col"]
+    )
+    index_list = [("Hello, World!", -234892), ("Hello, World!", 123456789)]
+
+    bf_result = scalars_df_multiindex.loc[index_list]
+    pd_result = scalars_pandas_df_multiindex.loc[index_list]
+
+    assert_dfs_equivalent(
+        pd_result,
+        bf_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "index_list",
+    [
+        [0, 1, 2, 3, 4, 4],
+        [0, 0, 0, 5, 4, 7, -2, -5, 3],
+        [-1, -2, -3, -4, -5, -5],
+    ],
+)
+def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list):
+    bf_result = scalars_df_index.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_iloc_list_multiindex(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+
+    index_list = [0, 0, 0, 5, 4, 7]
+
+    bf_result = scalars_df.iloc[index_list]
+    pd_result = scalars_pandas_df.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
+
+    index_list: List[int] = []
+
+    bf_result = scalars_df_index.iloc[index_list]
+    pd_result = scalars_pandas_df_index.iloc[index_list]
+
+    bf_result = bf_result.to_pandas()
+    assert bf_result.shape == pd_result.shape  # types are known to be different
+
+
+def test_rename_axis(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.rename_axis("newindexname")
+    pd_result = scalars_pandas_df_index.rename_axis("newindexname")
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_rename_axis_nonstring(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.rename_axis((4,))
+    pd_result = scalars_pandas_df_index.rename_axis((4,))
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index):
+    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+
+    scalars_df_index = scalars_df_index.set_index("string_col")
+    scalars_pandas_df_index = scalars_pandas_df_index.set_index("string_col")
+
+    bf_result = scalars_df_index.loc[bf_string_series]
+    pd_result = scalars_pandas_df_index.loc[pd_string_series]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index):
+    pd_string_series = scalars_pandas_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+    bf_string_series = scalars_df_index.string_col.iloc[[0, 5, 1, 1, 5]]
+
+    scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
+    scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
+        ["string_col", "int64_col"]
+    )
+
+    bf_result = scalars_df_multiindex.loc[bf_string_series]
+    pd_result = scalars_pandas_df_multiindex.loc[pd_string_series]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index):
+    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
+    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
+
+    bf_result = scalars_df_index.loc[bf_index]
+    pd_result = scalars_pandas_df_index.loc[pd_index]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_loc_bf_index_integer_index_renamed_col(
+    scalars_df_index, scalars_pandas_df_index
+):
+    scalars_df_index = scalars_df_index.rename(columns={"int64_col": "rename"})
+    scalars_pandas_df_index = scalars_pandas_df_index.rename(
+        columns={"int64_col": "rename"}
+    )
+
+    pd_index = scalars_pandas_df_index.iloc[[0, 5, 1, 1, 5]].index
+    bf_index = scalars_df_index.iloc[[0, 5, 1, 1, 5]].index
+
+    bf_result = scalars_df_index.loc[bf_index]
+    pd_result = scalars_pandas_df_index.loc[pd_index]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("subset"),
+    [
+        None,
+        "bool_col",
+        ["bool_col", "int64_too"],
+    ],
+)
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        (False,),
+    ],
+)
+def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, subset):
+    columns = ["bool_col", "int64_too", "int64_col"]
+    bf_df = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas()
+    pd_df = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep)
+    pd.testing.assert_frame_equal(
+        pd_df,
+        bf_df,
+    )
+
+
+@pytest.mark.parametrize(
+    ("subset"),
+    [
+        None,
+        ["bool_col"],
+    ],
+)
+@pytest.mark.parametrize(
+    ("keep",),
+    [
+        (False,),
+    ],
+)
+def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
+    columns = ["bool_col", "int64_too", "int64_col"]
+    bf_series = scalars_df_index[columns].duplicated(subset, keep=keep).to_pandas()
+    pd_series = scalars_pandas_df_index[columns].duplicated(subset, keep=keep)
+    pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
+
+
+def test_df_from_dict_columns_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="columns")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_index_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(
+        data, orient="index", columns=["col1", "col2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_tight_orient():
+    data = {
+        "index": [("i1", "i2"), ("i3", "i4")],
+        "columns": ["col1", "col2"],
+        "data": [[1, 2.6], [3, 4.5]],
+        "index_names": ["in1", "in2"],
+        "column_names": ["column_axis"],
+    }
+
+    bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="tight")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_records():
+    records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d"))
+
+    bf_result = dataframe.DataFrame.from_records(
+        records, columns=["c1", "c2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+    bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_dict()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_json_local_str(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_json()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.to_json(default_handler=str)
+
+    assert bf_result == pd_result
+
+
+def test_df_to_json_local_file(scalars_df_index, scalars_pandas_df_index):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_json(bf_result_file, orient="table")
+        # default_handler for arrow types that have no default conversion
+        scalars_pandas_df_index.to_json(
+            pd_result_file, orient="table", default_handler=str
+        )
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_csv_local_str(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.to_csv()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.to_csv()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_csv_local_file(scalars_df_index, scalars_pandas_df_index):
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_csv(bf_result_file)
+        scalars_pandas_df_index.to_csv(pd_result_file)
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_parquet_local_bytes(scalars_df_index, scalars_pandas_df_index):
+    # GEOGRAPHY not supported in parquet export.
+    unsupported = ["geography_col"]
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_parquet()
+    # default_handler for arrow types that have no default conversion
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_parquet()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_parquet_local_file(scalars_df_index, scalars_pandas_df_index):
+    # GEOGRAPHY not supported in parquet export.
+    unsupported = ["geography_col"]
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.drop(columns=unsupported).to_parquet(bf_result_file)
+        scalars_pandas_df_index.drop(columns=unsupported).to_parquet(pd_result_file)
+
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_records(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]
+    bf_result = scalars_df_index.drop(columns=unsupported).to_records()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_records()
+
+    for bfi, pdi in zip(bf_result, pd_result):
+        for bfj, pdj in zip(bfi, pdi):
+            assert pd.isna(bfj) and pd.isna(pdj) or bfj == pdj
+
+
+def test_df_to_string(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_string()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_string()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_html(scalars_df_index, scalars_pandas_df_index):
+    unsupported = ["numeric_col"]  # formatted differently
+
+    bf_result = scalars_df_index.drop(columns=unsupported).to_html()
+    pd_result = scalars_pandas_df_index.drop(columns=unsupported).to_html()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_markdown(scalars_df_index, scalars_pandas_df_index):
+    # Nulls have bug from tabulate https://github.com/astanin/python-tabulate/issues/231
+    bf_result = scalars_df_index.dropna().to_markdown()
+    pd_result = scalars_pandas_df_index.dropna().to_markdown()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_pickle(scalars_df_index, scalars_pandas_df_index):
+    with tempfile.TemporaryFile() as bf_result_file, tempfile.TemporaryFile() as pd_result_file:
+        scalars_df_index.to_pickle(bf_result_file)
+        scalars_pandas_df_index.to_pickle(pd_result_file)
+        bf_result = bf_result_file.read()
+        pd_result = pd_result_file.read()
+
+    assert bf_result == pd_result
+
+
+def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
+    unsupported = [
+        "numeric_col",
+        "bytes_col",
+        "date_col",
+        "datetime_col",
+        "time_col",
+        "timestamp_col",
+        "geography_col",
+    ]
+
+    bf_result_file = tempfile.TemporaryFile()
+    pd_result_file = tempfile.TemporaryFile()
+    scalars_df_index.drop(columns=unsupported).to_orc(bf_result_file)
+    scalars_pandas_df_index.drop(columns=unsupported).reset_index().to_orc(
+        pd_result_file
+    )
+    bf_result = bf_result_file.read()
+    pd_result = bf_result_file.read()
+
+    assert bf_result == pd_result
+
+
+@pytest.mark.parametrize(
+    ("expr",),
+    [
+        ("new_col = int64_col + int64_too",),
+        ("new_col = (rowindex > 3) | bool_col",),
+        ("int64_too = bool_col\nnew_col2 = rowindex",),
+    ],
+)
+def test_df_eval(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.eval(expr).to_pandas()
+    pd_result = scalars_pandas_df.eval(expr)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("expr",),
+    [
+        ("int64_col > int64_too",),
+        ("bool_col",),
+        ("((int64_col - int64_too) % @local_var) == 0",),
+    ],
+)
+def test_df_query(scalars_dfs, expr):
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    # local_var is referenced in expressions
+    local_var = 3  # NOQA
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = scalars_df.query(expr).to_pandas()
+    pd_result = scalars_pandas_df.query(expr)
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+@pytest.mark.parametrize(
+    ("subset", "normalize", "ascending", "dropna"),
+    [
+        (None, False, False, False),
+        (None, True, True, True),
+        ("bool_col", True, False, True),
+    ],
+)
+def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pandas 1.x produces different column labels.")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = (
+        scalars_df[["string_col", "bool_col"]]
+        .value_counts(subset, normalize=normalize, ascending=ascending, dropna=dropna)
+        .to_pandas()
+    )
+    pd_result = scalars_pandas_df[["string_col", "bool_col"]].value_counts(
+        subset, normalize=normalize, ascending=ascending, dropna=dropna
+    )
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_bool_interpretation_error(scalars_df_index):
+    with pytest.raises(ValueError):
+        True if scalars_df_index else False
+
+
+def test_assign_after_binop_row_joins():
+    pd_df = pd.DataFrame(
+        {
+            "idx1": [1, 1, 1, 1, 2, 2, 2, 2],
+            "idx2": [10, 10, 20, 20, 10, 10, 20, 20],
+            "metric1": [10, 14, 2, 13, 6, 2, 9, 5],
+            "metric2": [25, -3, 8, 2, -1, 0, 0, -4],
+        },
+        dtype=pd.Int64Dtype(),
+    ).set_index(["idx1", "idx2"])
+    bf_df = dataframe.DataFrame(pd_df)
+
+    # Expect implicit joiner to be used, preserving input cardinality rather than getting relational join
+    bf_df["metric_diff"] = bf_df.metric1 - bf_df.metric2
+    pd_df["metric_diff"] = pd_df.metric1 - pd_df.metric2
+
+    assert_pandas_df_equal(bf_df.to_pandas(), pd_df)
+
+
+def test_df_dot_inline(session):
+    df1 = pd.DataFrame([[1, 2, 3], [2, 5, 7]])
+    df2 = pd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]])
+
+    bf1 = session.read_pandas(df1)
+    bf2 = session.read_pandas(df2)
+    bf_result = bf1.dot(bf2).to_pandas()
+    pd_result = df1.dot(df2)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    for name in pd_result.columns:
+        pd_result[name] = pd_result[name].astype(pd.Int64Dtype())
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+def test_df_dot_series_inline():
+    left = [[1, 2, 3], [2, 5, 7]]
+    right = [2, 1, 3]
+
+    bf1 = dataframe.DataFrame(left)
+    bf2 = series.Series(right)
+    bf_result = bf1.dot(bf2).to_pandas()
+
+    df1 = pd.DataFrame(left)
+    df2 = pd.Series(right)
+    pd_result = df1.dot(df2)
+
+    # Patch pandas dtypes for testing parity
+    # Pandas result is int64 instead of Int64 (nullable) dtype.
+    pd_result = pd_result.astype(pd.Int64Dtype())
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+    )
+
+
+@pytest.mark.parametrize(
+    ("col_names", "ignore_index"),
+    [
+        pytest.param(["A"], False, id="one_array_false"),
+        pytest.param(["A"], True, id="one_array_true"),
+        pytest.param(["B"], False, id="one_float_false"),
+        pytest.param(["B"], True, id="one_float_true"),
+        pytest.param(["A", "C"], False, id="two_arrays_false"),
+        pytest.param(["A", "C"], True, id="two_arrays_true"),
+    ],
+)
+def test_dataframe_explode(col_names, ignore_index, session):
+    data = {
+        "A": [[0, 1, 2], [], [3, 4]],
+        "B": 3,
+        "C": [["a", "b", "c"], np.nan, ["d", "e"]],
+    }
+
+    df = bpd.DataFrame(data, session=session)
+    pd_df = df.to_pandas()
+    pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
+    bf_result = df.explode(col_names, ignore_index=ignore_index)
+
+    # Check that to_pandas() results in at most a single query execution
+    bf_materialized = bf_result.to_pandas()
+
+    pd.testing.assert_frame_equal(
+        bf_materialized,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("ignore_index", "ordered"),
+    [
+        pytest.param(True, True, id="include_index_ordered"),
+        pytest.param(True, False, id="include_index_unordered"),
+        pytest.param(False, True, id="ignore_index_ordered"),
+    ],
+)
+def test_dataframe_explode_reserve_order(session, ignore_index, ordered):
+    data = {
+        "a": [np.random.randint(0, 10, 10) for _ in range(10)],
+        "b": [np.random.randint(0, 10, 10) for _ in range(10)],
+    }
+    df = bpd.DataFrame(data)
+    pd_df = pd.DataFrame(data)
+
+    res = df.explode(["a", "b"], ignore_index=ignore_index).to_pandas(ordered=ordered)
+    pd_res = pd_df.explode(["a", "b"], ignore_index=ignore_index).astype(
+        pd.Int64Dtype()
+    )
+    pd.testing.assert_frame_equal(
+        res if ordered else res.sort_index(),
+        pd_res,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    ("col_names"),
+    [
+        pytest.param([], id="empty", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param(
+            ["A", "A"], id="duplicate", marks=pytest.mark.xfail(raises=ValueError)
+        ),
+        pytest.param("unknown", id="unknown", marks=pytest.mark.xfail(raises=KeyError)),
+    ],
+)
+def test_dataframe_explode_xfail(col_names):
+    df = bpd.DataFrame({"A": [[0, 1, 2], [], [3, 4]]})
+    df.explode(col_names)
diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py
index 613a929c04..92a58b3dc6 100644
--- a/third_party/bigframes_vendored/geopandas/geoseries.py
+++ b/third_party/bigframes_vendored/geopandas/geoseries.py
@@ -483,3 +483,25 @@ def intersection(self: GeoSeries, other: GeoSeries) -> GeoSeries:  # type: ignor
                 each aligned geometry with other.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @property
+    def is_closed(self: GeoSeries) -> bigframes.series.Series:
+        """
+        [Not Implemented] Use ``bigframes.bigquery.st_isclosed(series)``
+        instead to return a boolean indicating if a shape is closed.
+
+        In GeoPandas, this returns a Series of booleans with value True if a
+        LineString's or LinearRing's first and last points are equal.
+
+        Returns False for any other geometry type.
+
+        Returns:
+            bigframes.pandas.Series:
+                Series of booleans.
+
+        Raises:
+            NotImplementedError:
+                GeoSeries.is_closed is not supported. Use
+                ``bigframes.bigquery.st_isclosed(series)``, instead.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/ibis/expr/types/core.py b/third_party/bigframes_vendored/ibis/expr/types/core.py
index 9685e4ddca..5704dc993a 100644
--- a/third_party/bigframes_vendored/ibis/expr/types/core.py
+++ b/third_party/bigframes_vendored/ibis/expr/types/core.py
@@ -19,6 +19,7 @@
 import bigframes_vendored.ibis.expr.operations as ops
 from bigframes_vendored.ibis.expr.types.pretty import to_rich
 from bigframes_vendored.ibis.util import experimental
+import pandas as pd
 from public import public
 from rich.console import Console
 from rich.jupyter import JupyterMixin
@@ -34,7 +35,6 @@
         EdgeAttributeGetter,
         NodeAttributeGetter,
     )
-    import pandas as pd
     import polars as pl
     import pyarrow as pa
     import torch
@@ -744,9 +744,9 @@ def _binop(op_class: type[ops.Binary], left: ir.Value, right: ir.Value) -> ir.Va
 
 def _is_null_literal(value: Any) -> bool:
     """Detect whether `value` will be treated by ibis as a null literal."""
-    if value is None:
-        return True
     if isinstance(value, Expr):
         op = value.op()
         return isinstance(op, ops.Literal) and op.value is None
+    if pd.isna(value):
+        return True
     return False
diff --git a/third_party/bigframes_vendored/pandas/core/computation/eval.py b/third_party/bigframes_vendored/pandas/core/computation/eval.py
index 56d60174a6..d3d11a9c2a 100644
--- a/third_party/bigframes_vendored/pandas/core/computation/eval.py
+++ b/third_party/bigframes_vendored/pandas/core/computation/eval.py
@@ -171,6 +171,7 @@ def eval(
     with plain ol' Python evaluation.
 
     **Examples:**
+
         >>> import bigframes.pandas as bpd
         >>> bpd.options.display.progress_bar = None
 
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index c1b5b5a86b..6c927a5c26 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4253,6 +4253,7 @@ def corrwith(
         correlations.
 
         **Examples:**
+
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py
index 469f35f181..dfb1cf9efc 100644
--- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py
@@ -204,6 +204,7 @@ def isocalendar(self):
         Calculate year, week, and day according to the ISO 8601 standard.
 
         **Examples:**
+
             >>> import pandas as pd
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py
index 7df1c7a9de..6a6bb96897 100644
--- a/third_party/bigframes_vendored/pandas/core/indexes/base.py
+++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py
@@ -1087,6 +1087,25 @@ def unique(self, level: Hashable | int | None = None):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def item(self, *args, **kwargs):
+        """Return the first element of the underlying data as a Python scalar.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([1], index=['a'])
+            >>> s.index.item()
+            'a'
+
+        Returns:
+            scalar: The first element of Index.
+
+        Raises:
+            ValueError: If the data is not length = 1.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def to_numpy(self, dtype, *, allow_large_results=None):
         """
         A NumPy ndarray representing the values in this Series or Index.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 61cd6a47bf..b2846d675c 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -4933,6 +4933,26 @@ def kurt(self):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def item(self: Series, *args, **kwargs):
+        """Return the first element of the underlying data as a Python scalar.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import numpy as np
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([1])
+            >>> s.item()
+            np.int64(1)
+
+        Returns:
+            scalar: The first element of Series.
+
+        Raises:
+            ValueError: If the data is not length = 1.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def items(self):
         """
         Lazily iterate over (index, value) tuples.
diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
index 9f3d87ecb7..9b5b461ea5 100644
--- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py
+++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py
@@ -252,15 +252,12 @@ def strip(self, to_strip: typing.Optional[str] = None):
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> s = bpd.Series(['1. Ant.', '  2. Bee? ', '\\t3. Cat!\\n', bpd.NA])
-            >>> s
-            0        1. Ant.
-            1       2. Bee?
-            2       3. Cat!
-            <BLANKLINE>
-            3      <NA>
-            dtype: string
-
+            >>> s = bpd.Series([
+            ...     '1. Ant.',
+            ...     '  2. Bee? ',
+            ...     '\\t3. Cat!\\n',
+            ...     bpd.NA,
+            ... ])
             >>> s.str.strip()
             0    1. Ant.
             1    2. Bee?
@@ -269,10 +266,10 @@ def strip(self, to_strip: typing.Optional[str] = None):
             dtype: string
 
             >>> s.str.strip('123.!? \\n\\t')
-            0     Ant
-            1     Bee
-            2     Cat
-            3    <NA>
+            0       Ant
+            1       Bee
+            2       Cat
+            3       <NA>
             dtype: string
 
         Args:
@@ -543,7 +540,7 @@ def isdecimal(self):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def rstrip(self, to_strip: typing.Optional[str] = None):
-        """Remove trailing characters.
+        r"""Remove trailing characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
         from each string in the Series/Index from right side.
@@ -555,19 +552,11 @@ def rstrip(self, to_strip: typing.Optional[str] = None):
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> s = bpd.Series(['Ant', '  Bee ', '\\tCat\\n', bpd.NA])
-            >>> s
-            0       Ant
-            1      Bee
-            2       Cat
-            <BLANKLINE>
-            3      <NA>
-            dtype: string
-
+            >>> s = bpd.Series(['Ant', '  Bee ', '\tCat\n', bpd.NA])
             >>> s.str.rstrip()
             0      Ant
             1      Bee
-            2       Cat
+            2    \tCat
             3     <NA>
             dtype: string
 
@@ -584,7 +573,7 @@ def rstrip(self, to_strip: typing.Optional[str] = None):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def lstrip(self, to_strip: typing.Optional[str] = None):
-        """Remove leading characters.
+        r"""Remove leading characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
         from each string in the Series/Index from left side.
@@ -596,21 +585,12 @@ def lstrip(self, to_strip: typing.Optional[str] = None):
             >>> import bigframes.pandas as bpd
             >>> bpd.options.display.progress_bar = None
 
-            >>> s = bpd.Series(['Ant', '  Bee ', '\\tCat\\n', bpd.NA])
-            >>> s
-            0       Ant
-            1      Bee
-            2       Cat
-            <BLANKLINE>
-            3      <NA>
-            dtype: string
-
+            >>> s = bpd.Series(['Ant', '  Bee ', '\tCat\n', bpd.NA])
             >>> s.str.lstrip()
-            0     Ant
-            1    Bee
-            2    Cat
-            <BLANKLINE>
-            3    <NA>
+            0      Ant
+            1     Bee
+            2    Cat\n
+            3     <NA>
             dtype: string
 
         Args:
diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py
index aa4d862b65..a0d4092571 100644
--- a/third_party/bigframes_vendored/pandas/io/gbq.py
+++ b/third_party/bigframes_vendored/pandas/io/gbq.py
@@ -67,6 +67,7 @@ def read_gbq(
             >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
 
         Read table path with wildcard suffix and filters:
+
             >>> df = bpd.read_gbq_table("bigquery-public-data.noaa_gsod.gsod19*", filters=[("_table_suffix", ">=", "30"), ("_table_suffix", "<=", "39")])
 
         Preserve ordering in a query input.
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index 6cc3d952ed..e41364d4d1 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.5.0"
+__version__ = "2.6.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-05-30"
+__release_date__ = "2025-06-09"
 # {x-release-please-end}