diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py
index b65b6e5083..454c94599d 100644
--- a/tests/system/large/ml/test_cluster.py
+++ b/tests/system/large/ml/test_cluster.py
@@ -13,13 +13,11 @@
 # limitations under the License.
 
 import pandas as pd
-import pytest
 
 from bigframes.ml import cluster
-from tests.system.utils import assert_pandas_df_equal
+from tests.system import utils
 
 
-@pytest.mark.flaky(retries=2)
 def test_cluster_configure_fit_score_predict(
     session, penguins_df_default_index, dataset_id
 ):
@@ -88,26 +86,18 @@ def test_cluster_configure_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = model.score(new_penguins).to_pandas()
-    score_expected = pd.DataFrame(
-        {"davies_bouldin_index": [1.502182], "mean_squared_distance": [1.953408]},
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
 
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
-    )
+    eval_metrics = ["davies_bouldin_index", "mean_squared_distance"]
+    utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1)
 
     predictions = model.predict(new_penguins).to_pandas()
     assert predictions.shape == (4, 9)
-    result = predictions[["CENTROID_ID"]]
-    expected = pd.DataFrame(
-        {"CENTROID_ID": [2, 3, 1, 2]},
-        dtype="Int64",
-        index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["CENTROID_ID"],
+        index=["test1", "test2", "test3", "test4"],
+        col_exact=False,
     )
-    expected.index.name = "observation"
-    assert_pandas_df_equal(result, expected, ignore_order=True)
 
     # save, load, check n_clusters to ensure configuration was kept
     reloaded_model = model.to_gbq(
diff --git a/tests/system/large/ml/test_compose.py b/tests/system/large/ml/test_compose.py
index 38c5014c19..45322e78dd 100644
--- a/tests/system/large/ml/test_compose.py
+++ b/tests/system/large/ml/test_compose.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas
-
 from bigframes.ml import compose, preprocessing
+from tests.system import utils
 
 
 def test_columntransformer_standalone_fit_and_transform(
@@ -45,26 +44,18 @@ def test_columntransformer_standalone_fit_and_transform(
     )
     result = transformer.transform(new_penguins_df).to_pandas()
 
-    expected = pandas.DataFrame(
-        {
-            "onehotencoded_species": [
-                [{"index": 1, "value": 1.0}],
-                [{"index": 1, "value": 1.0}],
-                [{"index": 2, "value": 1.0}],
-            ],
-            "standard_scaled_culmen_length_mm": [
-                -0.811119671289163,
-                -0.9945520581113803,
-                -1.104611490204711,
-            ],
-            "min_max_scaled_culmen_length_mm": [0.269, 0.232, 0.210],
-            "standard_scaled_flipper_length_mm": [-0.350044, -1.418336, -0.9198],
-        },
-        index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"),
+    utils.check_pandas_df_schema_and_index(
+        result,
+        columns=[
+            "onehotencoded_species",
+            "standard_scaled_culmen_length_mm",
+            "min_max_scaled_culmen_length_mm",
+            "standard_scaled_flipper_length_mm",
+        ],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
-    pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False)
-
 
 def test_columntransformer_standalone_fit_transform(new_penguins_df):
     transformer = compose.ColumnTransformer(
@@ -86,25 +77,17 @@ def test_columntransformer_standalone_fit_transform(new_penguins_df):
         new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]]
     ).to_pandas()
 
-    expected = pandas.DataFrame(
-        {
-            "onehotencoded_species": [
-                [{"index": 1, "value": 1.0}],
-                [{"index": 1, "value": 1.0}],
-                [{"index": 2, "value": 1.0}],
-            ],
-            "standard_scaled_culmen_length_mm": [
-                1.313249,
-                -0.20198,
-                -1.111118,
-            ],
-            "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338],
-        },
-        index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"),
+    utils.check_pandas_df_schema_and_index(
+        result,
+        columns=[
+            "onehotencoded_species",
+            "standard_scaled_culmen_length_mm",
+            "standard_scaled_flipper_length_mm",
+        ],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
-    pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False)
-
 
 def test_columntransformer_save_load(new_penguins_df, dataset_id):
     transformer = compose.ColumnTransformer(
@@ -147,23 +130,13 @@ def test_columntransformer_save_load(new_penguins_df, dataset_id):
         new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]]
     ).to_pandas()
 
-    # TODO(b/340888429): fix type error
-    expected = pandas.DataFrame(  # type: ignore
-        {
-            "onehotencoded_species": [
-                [{"index": 1, "value": 1.0}],
-                [{"index": 1, "value": 1.0}],
-                [{"index": 2, "value": 1.0}],
-            ],
-            "standard_scaled_culmen_length_mm": [
-                1.313249,
-                -0.20198,
-                -1.111118,
-            ],
-            "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338],
-        },
-        index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"),
+    utils.check_pandas_df_schema_and_index(
+        result,
+        columns=[
+            "onehotencoded_species",
+            "standard_scaled_culmen_length_mm",
+            "standard_scaled_flipper_length_mm",
+        ],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
-
-    # TODO(b/340888429): fix type error
-    pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False)  # type: ignore
diff --git a/tests/system/large/ml/test_core.py b/tests/system/large/ml/test_core.py
index aec1065e41..be5eea925f 100644
--- a/tests/system/large/ml/test_core.py
+++ b/tests/system/large/ml/test_core.py
@@ -12,14 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas
-import pytest
-
 from bigframes.ml import globals
+from tests.system import utils
 
 
-# TODO(garrettwu): Re-enable or not check exact numbers.
-@pytest.mark.skip(reason="bqml regression")
 def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_df):
     df = penguins_df_default_index.dropna()
     X_train = df[
@@ -38,41 +34,33 @@ def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_d
         X_train, y_train, options={"model_type": "linear_reg"}
     )
 
+    eval_metrics = [
+        "mean_absolute_error",
+        "mean_squared_error",
+        "mean_squared_log_error",
+        "median_absolute_error",
+        "r2_score",
+        "explained_variance",
+    ]
     # no data - report evaluation from the automatic data split
     evaluate_result = model.evaluate().to_pandas()
-    evaluate_expected = pandas.DataFrame(
-        {
-            "mean_absolute_error": [225.817334],
-            "mean_squared_error": [80540.705944],
-            "mean_squared_log_error": [0.004972],
-            "median_absolute_error": [173.080816],
-            "r2_score": [0.87529],
-            "explained_variance": [0.87529],
-        },
-        dtype="Float64",
-    )
-    evaluate_expected = evaluate_expected.reindex(
-        index=evaluate_expected.index.astype("Int64")
-    )
-    pandas.testing.assert_frame_equal(
-        evaluate_result, evaluate_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        evaluate_result, columns=eval_metrics, index=1
     )
 
     # evaluate on all training data
     evaluate_result = model.evaluate(df).to_pandas()
-    pandas.testing.assert_frame_equal(
-        evaluate_result, evaluate_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        evaluate_result, columns=eval_metrics, index=1
     )
 
     # predict new labels
     predictions = model.predict(new_penguins_df).to_pandas()
-    expected = pandas.DataFrame(
-        {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]},
-        dtype="Float64",
-        index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pandas.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
     new_name = f"{dataset_id}.my_model"
@@ -108,42 +96,34 @@ def test_bqml_manual_preprocessing_e2e(
         X_train, y_train, transforms=transforms, options=options
     )
 
+    eval_metrics = [
+        "mean_absolute_error",
+        "mean_squared_error",
+        "mean_squared_log_error",
+        "median_absolute_error",
+        "r2_score",
+        "explained_variance",
+    ]
+
     # no data - report evaluation from the automatic data split
     evaluate_result = model.evaluate().to_pandas()
-    evaluate_expected = pandas.DataFrame(
-        {
-            "mean_absolute_error": [309.477334],
-            "mean_squared_error": [152184.227218],
-            "mean_squared_log_error": [0.009524],
-            "median_absolute_error": [257.727777],
-            "r2_score": [0.764356],
-            "explained_variance": [0.764356],
-        },
-        dtype="Float64",
-    )
-    evaluate_expected = evaluate_expected.reindex(
-        index=evaluate_expected.index.astype("Int64")
-    )
-
-    pandas.testing.assert_frame_equal(
-        evaluate_result, evaluate_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        evaluate_result, columns=eval_metrics, index=1
     )
 
     # evaluate on all training data
     evaluate_result = model.evaluate(df).to_pandas()
-    pandas.testing.assert_frame_equal(
-        evaluate_result, evaluate_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        evaluate_result, columns=eval_metrics, index=1
     )
 
     # predict new labels
     predictions = model.predict(new_penguins_df).to_pandas()
-    expected = pandas.DataFrame(
-        {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]},
-        dtype="Float64",
-        index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pandas.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
     new_name = f"{dataset_id}.my_model"
@@ -168,24 +148,9 @@ def test_bqml_standalone_transform(penguins_df_default_index, new_penguins_df):
     )
 
     transformed = model.transform(new_penguins_df).to_pandas()
-    expected = pandas.DataFrame(
-        {
-            "scaled_culmen_length_mm": [-0.8099, -0.9931, -1.103],
-            "onehotencoded_species": [
-                [{"index": 1, "value": 1.0}],
-                [{"index": 1, "value": 1.0}],
-                [{"index": 2, "value": 1.0}],
-            ],
-        },
-        index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    expected["scaled_culmen_length_mm"] = expected["scaled_culmen_length_mm"].astype(
-        "Float64"
-    )
-    pandas.testing.assert_frame_equal(
-        transformed[["scaled_culmen_length_mm", "onehotencoded_species"]],
-        expected,
-        check_exact=False,
-        rtol=0.1,
-        check_dtype=False,
+    utils.check_pandas_df_schema_and_index(
+        transformed,
+        columns=["scaled_culmen_length_mm", "onehotencoded_species"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py
index 9dab85023a..4312c24125 100644
--- a/tests/system/large/ml/test_decomposition.py
+++ b/tests/system/large/ml/test_decomposition.py
@@ -15,7 +15,7 @@
 import pandas as pd
 
 from bigframes.ml import decomposition
-import tests.system.utils
+from tests.system import utils
 
 
 def test_decomposition_configure_fit_score_predict(
@@ -45,34 +45,19 @@ def test_decomposition_configure_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = model.score(new_penguins).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "total_explained_variance_ratio": [0.812383],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=["total_explained_variance_ratio"], index=1
     )
 
     result = model.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "principal_component_1": [-1.459, 2.258, -1.685],
-            "principal_component_2": [-1.120, -1.351, -0.874],
-            "principal_component_3": [-0.646, 0.443, -0.704],
-        },
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-
-    tests.system.utils.assert_pandas_df_equal_pca(
+    utils.check_pandas_df_schema_and_index(
         result,
-        expected,
-        check_exact=False,
-        rtol=0.1,
+        columns=[
+            "principal_component_1",
+            "principal_component_2",
+            "principal_component_3",
+        ],
+        index=[1633, 1672, 1690],
     )
 
     # save, load, check n_components to ensure configuration was kept
@@ -114,36 +99,21 @@ def test_decomposition_configure_fit_score_predict_params(
 
     # Check score to ensure the model was fitted
     score_result = model.score(new_penguins).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "total_explained_variance_ratio": [0.932897],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=["total_explained_variance_ratio"], index=1
     )
 
     result = model.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "principal_component_1": [-1.459, 2.258, -1.685],
-            "principal_component_2": [-1.120, -1.351, -0.874],
-            "principal_component_3": [-0.646, 0.443, -0.704],
-            "principal_component_4": [-0.539, 0.234, -0.571],
-            "principal_component_5": [-0.876, 0.122, 0.609],
-        },
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-
-    tests.system.utils.assert_pandas_df_equal_pca(
+    utils.check_pandas_df_schema_and_index(
         result,
-        expected,
-        check_exact=False,
-        rtol=0.1,
+        columns=[
+            "principal_component_1",
+            "principal_component_2",
+            "principal_component_3",
+            "principal_component_4",
+            "principal_component_5",
+        ],
+        index=[1633, 1672, 1690],
     )
 
     # save, load, check n_components to ensure configuration was kept
diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py
index b9aae21956..b71d023302 100644
--- a/tests/system/large/ml/test_ensemble.py
+++ b/tests/system/large/ml/test_ensemble.py
@@ -12,16 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest import TestCase
-
-import pandas
 import pytest
 
 import bigframes.ml.ensemble
+from tests.system import utils
 
 
-# TODO(garrettwu): Re-enable or not check exact numbers.
-@pytest.mark.skip(reason="bqml regression")
 @pytest.mark.flaky(retries=2)
 def test_xgbregressor_default_params(penguins_df_default_index, dataset_id):
     model = bigframes.ml.ensemble.XGBRegressor()
@@ -42,19 +38,9 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id):
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    expected = pandas.DataFrame(
-        {
-            "mean_absolute_error": [97.368139],
-            "mean_squared_error": [16284.877027],
-            "mean_squared_log_error": [0.0010189],
-            "median_absolute_error": [72.158691],
-            "r2_score": [0.974784],
-            "explained_variance": [0.974845],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pandas.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -104,16 +90,9 @@ def test_xgbregressor_dart_booster_multiple_params(
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "mean_absolute_error",
-        "mean_squared_error",
-        "mean_squared_log_error",
-        "median_absolute_error",
-        "r2_score",
-        "explained_variance",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -161,16 +140,9 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id):
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "precision",
-        "recall",
-        "accuracy",
-        "f1_score",
-        "log_loss",
-        "roc_auc",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -183,7 +155,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id):
     )
 
 
-@pytest.mark.flaky(retries=2)
+# @pytest.mark.flaky(retries=2)
 def test_xgbclassifier_dart_booster_multiple_params(
     penguins_df_default_index, dataset_id
 ):
@@ -219,16 +191,9 @@ def test_xgbclassifier_dart_booster_multiple_params(
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "precision",
-        "recall",
-        "accuracy",
-        "f1_score",
-        "log_loss",
-        "roc_auc",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -277,16 +242,9 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "mean_absolute_error",
-        "mean_squared_error",
-        "mean_squared_log_error",
-        "median_absolute_error",
-        "r2_score",
-        "explained_variance",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -331,16 +289,9 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "mean_absolute_error",
-        "mean_squared_error",
-        "mean_squared_log_error",
-        "median_absolute_error",
-        "r2_score",
-        "explained_variance",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -385,16 +336,9 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "precision",
-        "recall",
-        "accuracy",
-        "f1_score",
-        "log_loss",
-        "roc_auc",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -439,16 +383,9 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    TestCase().assertSequenceEqual(result.shape, (1, 6))
-    for col_name in [
-        "precision",
-        "recall",
-        "accuracy",
-        "f1_score",
-        "log_loss",
-        "roc_auc",
-    ]:
-        assert col_name in result.columns
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
+    )
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py
index 1c0924245e..be7cf8c93d 100644
--- a/tests/system/large/ml/test_forecasting.py
+++ b/tests/system/large/ml/test_forecasting.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas as pd
 import pytest
 
 from bigframes.ml import forecasting
+from tests.system import utils
 
 ARIMA_EVALUATE_OUTPUT_COL = [
     "non_seasonal_p",
     "non_seasonal_d",
     "non_seasonal_q",
+    "has_drift",
     "log_likelihood",
     "AIC",
     "variance",
@@ -50,18 +51,17 @@ def test_arima_plus_model_fit_score(
     result = arima_model.score(
         new_time_series_df[["parsed_date"]], new_time_series_df[["total_visits"]]
     ).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [154.742547],
-            "mean_squared_error": [26844.868855],
-            "root_mean_squared_error": [163.844038],
-            "mean_absolute_percentage_error": [6.189702],
-            "symmetric_mean_absolute_percentage_error": [6.097155],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result,
+        columns=[
+            "mean_absolute_error",
+            "mean_squared_error",
+            "root_mean_squared_error",
+            "mean_absolute_percentage_error",
+            "symmetric_mean_absolute_percentage_error",
+        ],
+        index=1,
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load to ensure configuration was kept
     reloaded_model = arima_model.to_gbq(
@@ -73,10 +73,10 @@ def test_arima_plus_model_fit_score(
 
 
 def test_arima_plus_model_fit_summary(dataset_id, arima_model):
-
-    result = arima_model.summary()
-    assert result.shape == (1, 12)
-    assert all(column in result.columns for column in ARIMA_EVALUATE_OUTPUT_COL)
+    result = arima_model.summary().to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        result, columns=ARIMA_EVALUATE_OUTPUT_COL, index=1
+    )
 
     # save, load to ensure configuration was kept
     reloaded_model = arima_model.to_gbq(
@@ -88,13 +88,13 @@ def test_arima_plus_model_fit_summary(dataset_id, arima_model):
 
 
 def test_arima_coefficients(arima_model):
-    got = arima_model.coef_
-    expected_columns = {
+    result = arima_model.coef_.to_pandas()
+    expected_columns = [
         "ar_coefficients",
         "ma_coefficients",
         "intercept_or_drift",
-    }
-    assert set(got.columns) == expected_columns
+    ]
+    utils.check_pandas_df_schema_and_index(result, columns=expected_columns, index=1)
 
 
 def test_arima_plus_model_fit_params(time_series_df_default_index, dataset_id):
diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 50e3de3fc4..f1f7985278 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -12,9 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas as pd
-
 import bigframes.ml.linear_model
+from tests.system import utils
 
 
 def test_linear_regression_configure_fit_score(penguins_df_default_index, dataset_id):
@@ -36,19 +35,9 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [225.735767],
-            "mean_squared_error": [80417.461828],
-            "mean_squared_log_error": [0.004967],
-            "median_absolute_error": [172.543702],
-            "r2_score": [0.87548],
-            "explained_variance": [0.87548],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True)
@@ -99,19 +88,9 @@ def test_linear_regression_customized_params_fit_score(
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [240],
-            "mean_squared_error": [91197],
-            "mean_squared_log_error": [0.00573],
-            "median_absolute_error": [197],
-            "r2_score": [0.858],
-            "explained_variance": [0.8588],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True)
@@ -154,19 +133,9 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "precision": [0.616753],
-            "recall": [0.618615],
-            "accuracy": [0.92515],
-            "f1_score": [0.617681],
-            "log_loss": [1.498832],
-            "roc_auc": [0.975807],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
@@ -210,19 +179,9 @@ def test_logistic_regression_customized_params_fit_score(
 
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "precision": [0.487],
-            "recall": [0.602],
-            "accuracy": [0.464],
-            "f1_score": [0.379],
-            "log_loss": [0.972],
-            "roc_auc": [0.700],
-        },
-        dtype="Float64",
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
     )
-    expected = expected.reindex(index=expected.index.astype("Int64"))
-    pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1)
 
     # save, load, check parameters to ensure configuration was kept
     reloaded_model = model.to_gbq(
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 1a92d0f7d4..3d7eb2e426 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -24,7 +24,7 @@
     pipeline,
     preprocessing,
 )
-from tests.system.utils import assert_pandas_df_equal, assert_pandas_df_equal_pca
+from tests.system import utils
 
 
 def test_pipeline_linear_regression_fit_score_predict(
@@ -51,21 +51,8 @@ def test_pipeline_linear_regression_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = pl.score(X_train, y_train).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [309.477331],
-            "mean_squared_error": [152184.227219],
-            "mean_squared_log_error": [0.009524],
-            "median_absolute_error": [257.728263],
-            "r2_score": [0.764356],
-            "explained_variance": [0.764356],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
 
     # predict new labels
@@ -87,13 +74,11 @@ def test_pipeline_linear_regression_fit_score_predict(
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]},
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
@@ -115,21 +100,8 @@ def test_pipeline_linear_regression_series_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = pl.score(X_train, y_train).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [528.495599],
-            "mean_squared_error": [421722.261808],
-            "mean_squared_log_error": [0.022963],
-            "median_absolute_error": [468.895249],
-            "r2_score": [0.346999],
-            "explained_variance": [0.346999],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
 
     # predict new labels
@@ -142,13 +114,11 @@ def test_pipeline_linear_regression_series_fit_score_predict(
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins["culmen_length_mm"]).to_pandas()
-    expected = pd.DataFrame(
-        {"predicted_body_mass_g": [3818.845703, 3732.022253, 3679.928123]},
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
@@ -176,21 +146,8 @@ def test_pipeline_logistic_regression_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = pl.score(X_train, y_train).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "precision": [0.537091],
-            "recall": [0.538636],
-            "accuracy": [0.805389],
-            "f1_score": [0.537716],
-            "log_loss": [1.445433],
-            "roc_auc": [0.917818],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1
     )
 
     # predict new labels
@@ -211,19 +168,14 @@ def test_pipeline_logistic_regression_fit_score_predict(
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]},
-        dtype=pd.StringDtype(storage="pyarrow"),
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_sex"]],
-        expected,
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_sex"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
-# TODO(garrettwu): Re-enable or not check exact numbers.
-@pytest.mark.skip(reason="bqml regression")
 @pytest.mark.flaky(retries=2)
 def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_index):
     """Test a supervised model with a minimal preprocessing step"""
@@ -247,21 +199,8 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in
 
     # Check score to ensure the model was fitted
     score_result = pl.score(X_train, y_train).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "mean_absolute_error": [202.298434],
-            "mean_squared_error": [74515.108971],
-            "mean_squared_log_error": [0.004365],
-            "median_absolute_error": [142.949219],
-            "r2_score": [0.88462],
-            "explained_variance": [0.886454],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
 
     # predict new labels
@@ -283,24 +222,14 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "predicted_body_mass_g": [
-                4287.34521484375,
-                3198.351806640625,
-                3385.34130859375,
-            ]
-        },
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
-# TODO(garrettwu): Re-enable or not check exact numbers.
-@pytest.mark.skip(reason="bqml regression")
 @pytest.mark.flaky(retries=2)
 def test_pipeline_random_forest_classifier_fit_score_predict(
     session, penguins_df_default_index
@@ -326,21 +255,8 @@ def test_pipeline_random_forest_classifier_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = pl.score(X_train, y_train).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "precision": [0.585505],
-            "recall": [0.58676],
-            "accuracy": [0.877246],
-            "f1_score": [0.585657],
-            "log_loss": [0.880643],
-            "roc_auc": [0.970697],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1
     )
 
     # predict new labels
@@ -361,14 +277,11 @@ def test_pipeline_random_forest_classifier_fit_score_predict(
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]},
-        dtype=pd.StringDtype(storage="pyarrow"),
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_sex"]],
-        expected,
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_sex"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
@@ -412,40 +325,20 @@ def test_pipeline_PCA_fit_score_predict(session, penguins_df_default_index):
 
     # Check score to ensure the model was fitted
     score_result = pl.score(new_penguins).to_pandas()
-    score_expected = pd.DataFrame(
-        {
-            "total_explained_variance_ratio": [1.0],
-        },
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        score_result, columns=["total_explained_variance_ratio"], index=1
     )
 
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {
-            "principal_component_1": [-1.115259, -1.506141, -1.471173],
-            "principal_component_2": [-0.074825, 0.69664, 0.406103],
-            "principal_component_3": [0.500013, -0.544479, 0.075849],
-        },
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-
-    assert_pandas_df_equal_pca(
-        predictions[
-            [
-                "principal_component_1",
-                "principal_component_2",
-                "principal_component_3",
-            ]
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=[
+            "principal_component_1",
+            "principal_component_2",
+            "principal_component_3",
         ],
-        expected,
-        check_exact=False,
-        rtol=0.1,
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
@@ -538,29 +431,16 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict(
 
     # Check score to ensure the model was fitted
     score_result = pl.score(new_penguins).to_pandas()
-    score_expected = pd.DataFrame(
-        {"davies_bouldin_index": [7.542981], "mean_squared_distance": [94.692409]},
-        dtype="Float64",
-    )
-    score_expected = score_expected.reindex(index=score_expected.index.astype("Int64"))
-
-    pd.testing.assert_frame_equal(
-        score_result, score_expected, check_exact=False, rtol=0.1
-    )
+    eval_metrics = ["davies_bouldin_index", "mean_squared_distance"]
+    utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1)
 
     predictions = pl.predict(new_penguins).to_pandas().sort_index()
-    assert predictions.shape == (6, 9)
-    result = predictions[["CENTROID_ID"]]
-    expected = pd.DataFrame(
-        {"CENTROID_ID": [1, 2, 1, 2, 1, 2]},
-        dtype="Int64",
-        index=pd.Index(
-            ["test1", "test2", "test3", "test4", "test5", "test6"],
-            dtype="string[pyarrow]",
-        ),
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["CENTROID_ID"],
+        index=["test1", "test2", "test3", "test4", "test5", "test6"],
+        col_exact=False,
     )
-    expected.index.name = "observation"
-    assert_pandas_df_equal(result, expected, ignore_order=True)
 
 
 def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_index):
@@ -632,13 +512,11 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind
         ).set_index("tag_number")
     )
     predictions = pl.predict(new_penguins).to_pandas()
-    expected = pd.DataFrame(
-        {"predicted_body_mass_g": [3909.2, 3436.0, 2860.0]},
-        dtype="Float64",
-        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    )
-    pd.testing.assert_frame_equal(
-        predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1
+    utils.check_pandas_df_schema_and_index(
+        predictions,
+        columns=["predicted_body_mass_g"],
+        index=[1633, 1672, 1690],
+        col_exact=False,
     )
 
 
diff --git a/tests/system/utils.py b/tests/system/utils.py
index e40502e6f2..ab4c2c119f 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -15,7 +15,7 @@
 import base64
 import decimal
 import functools
-from typing import Iterable, Optional, Set
+from typing import Iterable, Optional, Set, Union
 
 import geopandas as gpd  # type: ignore
 import google.api_core.operation
@@ -28,6 +28,23 @@
 
 from bigframes.functions import remote_function
 
+ML_REGRESSION_METRICS = [
+    "mean_absolute_error",
+    "mean_squared_error",
+    "mean_squared_log_error",
+    "median_absolute_error",
+    "r2_score",
+    "explained_variance",
+]
+ML_CLASSFICATION_METRICS = [
+    "precision",
+    "recall",
+    "accuracy",
+    "f1_score",
+    "log_loss",
+    "roc_auc",
+]
+
 
 def skip_legacy_pandas(test):
     @functools.wraps(test)
@@ -249,6 +266,33 @@ def assert_pandas_df_equal_pca(actual, expected, **kwargs):
             pd.testing.assert_series_equal(-actual[column], expected[column], **kwargs)
 
 
+def check_pandas_df_schema_and_index(
+    pd_df: pd.DataFrame,
+    columns: Iterable,
+    index: Union[int, Iterable],
+    col_exact: bool = True,
+):
+    """Check pandas df schema and index. But not the values.
+
+    Args:
+        pd_df: the input pandas df
+        columns: target columns to check with
+        index: int or Iterable. If int, only check the length (index size) of the df. If Iterable, check index values match
+        col_exact: If True, check the columns param are exact match. Otherwise only check the df contains all of those columns
+    """
+    if col_exact:
+        assert list(pd_df.columns) == list(columns)
+    else:
+        assert set(columns) <= set(pd_df.columns)
+
+    if isinstance(index, int):
+        assert len(pd_df) == index
+    elif isinstance(index, Iterable):
+        assert list(pd_df.index) == list(index)
+    else:
+        raise ValueError("Unsupported index type.")
+
+
 def get_remote_function_endpoints(
     bigquery_client: bigquery.Client, dataset_id: str
 ) -> Set[str]: