From fc0a642412e05356ded562926c3899fe13643645 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 15 May 2024 01:33:30 +0000 Subject: [PATCH 1/2] test: stop checking ml large tests exact numbers --- tests/system/large/ml/test_cluster.py | 26 +- tests/system/large/ml/test_compose.py | 83 ++---- tests/system/large/ml/test_core.py | 117 +++----- tests/system/large/ml/test_decomposition.py | 72 ++--- tests/system/large/ml/test_ensemble.py | 173 +++++++---- tests/system/large/ml/test_forecasting.py | 51 ++-- tests/system/large/ml/test_linear_model.py | 107 +++---- tests/system/large/ml/test_pipeline.py | 308 ++++++++------------ tests/system/utils.py | 39 ++- 9 files changed, 473 insertions(+), 503 deletions(-) diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py index b633ca4ea2..fd1d30c711 100644 --- a/tests/system/large/ml/test_cluster.py +++ b/tests/system/large/ml/test_cluster.py @@ -13,13 +13,11 @@ # limitations under the License. import pandas as pd -import pytest from bigframes.ml import cluster -from tests.system.utils import assert_pandas_df_equal +from tests.system import utils -@pytest.mark.flaky(retries=2) def test_cluster_configure_fit_score_predict( session, penguins_df_default_index, dataset_id ): @@ -88,26 +86,18 @@ def test_cluster_configure_fit_score_predict( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - {"davies_bouldin_index": [1.502182], "mean_squared_distance": [1.953408]}, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 - ) + eval_metrics = ["davies_bouldin_index", "mean_squared_distance"] + utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1) predictions = model.predict(new_penguins).to_pandas() assert predictions.shape == (4, 9) - result = predictions[["CENTROID_ID"]] - expected = pd.DataFrame( - {"CENTROID_ID": [2, 3, 1, 2]}, - dtype="Int64", - index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"), + utils.check_pandas_df_schema_and_index( + predictions, + columns=["CENTROID_ID"], + index=["test1", "test2", "test3", "test4"], + col_exact=False, ) - expected.index.name = "observation" - assert_pandas_df_equal(result, expected, ignore_order=True) # save, load, check n_clusters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_compose.py b/tests/system/large/ml/test_compose.py index 7513b78b29..45322e78dd 100644 --- a/tests/system/large/ml/test_compose.py +++ b/tests/system/large/ml/test_compose.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas - from bigframes.ml import compose, preprocessing +from tests.system import utils def test_columntransformer_standalone_fit_and_transform( @@ -45,26 +44,18 @@ def test_columntransformer_standalone_fit_and_transform( ) result = transformer.transform(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - -0.811119671289163, - -0.9945520581113803, - -1.104611490204711, - ], - "min_max_scaled_culmen_length_mm": [0.269, 0.232, 0.210], - "standard_scaled_flipper_length_mm": [-0.350044, -1.418336, -0.9198], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "min_max_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) - def test_columntransformer_standalone_fit_transform(new_penguins_df): transformer = compose.ColumnTransformer( @@ -86,25 +77,17 @@ def test_columntransformer_standalone_fit_transform(new_penguins_df): new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]] ).to_pandas() - expected = pandas.DataFrame( - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - 1.313249, - -0.20198, - -1.111118, - ], - "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) - def test_columntransformer_save_load(new_penguins_df, dataset_id): transformer = compose.ColumnTransformer( @@ -147,21 +130,13 @@ def test_columntransformer_save_load(new_penguins_df, dataset_id): new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]] ).to_pandas() - expected = pandas.DataFrame( - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - 1.313249, - -0.20198, - -1.111118, - ], - "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) diff --git a/tests/system/large/ml/test_core.py b/tests/system/large/ml/test_core.py index aec1065e41..be5eea925f 100644 --- a/tests/system/large/ml/test_core.py +++ b/tests/system/large/ml/test_core.py @@ -12,14 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas -import pytest - from bigframes.ml import globals +from tests.system import utils -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_df): df = penguins_df_default_index.dropna() X_train = df[ @@ -38,41 +34,33 @@ def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_d X_train, y_train, options={"model_type": "linear_reg"} ) + eval_metrics = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", + ] # no data - report evaluation from the automatic data split evaluate_result = model.evaluate().to_pandas() - evaluate_expected = pandas.DataFrame( - { - "mean_absolute_error": [225.817334], - "mean_squared_error": [80540.705944], - "mean_squared_log_error": [0.004972], - "median_absolute_error": [173.080816], - "r2_score": [0.87529], - "explained_variance": [0.87529], - }, - dtype="Float64", - ) - evaluate_expected = evaluate_expected.reindex( - index=evaluate_expected.index.astype("Int64") - ) - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # evaluate on all training data evaluate_result = model.evaluate(df).to_pandas() - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # predict new labels predictions = model.predict(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]}, - dtype="Float64", - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pandas.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) new_name = f"{dataset_id}.my_model" @@ -108,42 +96,34 @@ def test_bqml_manual_preprocessing_e2e( X_train, y_train, transforms=transforms, options=options ) + eval_metrics = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", + ] + # no data - report evaluation from the automatic data split evaluate_result = model.evaluate().to_pandas() - evaluate_expected = pandas.DataFrame( - { - "mean_absolute_error": [309.477334], - "mean_squared_error": [152184.227218], - "mean_squared_log_error": [0.009524], - "median_absolute_error": [257.727777], - "r2_score": [0.764356], - "explained_variance": [0.764356], - }, - dtype="Float64", - ) - evaluate_expected = evaluate_expected.reindex( - index=evaluate_expected.index.astype("Int64") - ) - - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # evaluate on all training data evaluate_result = model.evaluate(df).to_pandas() - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # predict new labels predictions = model.predict(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, - dtype="Float64", - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pandas.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) new_name = f"{dataset_id}.my_model" @@ -168,24 +148,9 @@ def test_bqml_standalone_transform(penguins_df_default_index, new_penguins_df): ) transformed = model.transform(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - { - "scaled_culmen_length_mm": [-0.8099, -0.9931, -1.103], - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - }, - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - expected["scaled_culmen_length_mm"] = expected["scaled_culmen_length_mm"].astype( - "Float64" - ) - pandas.testing.assert_frame_equal( - transformed[["scaled_culmen_length_mm", "onehotencoded_species"]], - expected, - check_exact=False, - rtol=0.1, - check_dtype=False, + utils.check_pandas_df_schema_and_index( + transformed, + columns=["scaled_culmen_length_mm", "onehotencoded_species"], + index=[1633, 1672, 1690], + col_exact=False, ) diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py index 264b95a92e..87af7255fb 100644 --- a/tests/system/large/ml/test_decomposition.py +++ b/tests/system/large/ml/test_decomposition.py @@ -15,7 +15,7 @@ import pandas as pd from bigframes.ml import decomposition -import tests.system.utils +from tests.system import utils def test_decomposition_configure_fit_score_predict( @@ -45,34 +45,19 @@ def test_decomposition_configure_fit_score_predict( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [0.812383], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) result = model.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.459, 2.258, -1.685], - "principal_component_2": [-1.120, -1.351, -0.874], - "principal_component_3": [-0.646, 0.443, -0.704], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - tests.system.utils.assert_pandas_df_equal_pca( + utils.check_pandas_df_schema_and_index( result, - expected, - check_exact=False, - rtol=0.1, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", + ], + index=[1633, 1672, 1690], ) # save, load, check n_components to ensure configuration was kept @@ -113,36 +98,21 @@ def test_decomposition_configure_fit_score_predict_params( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [0.932897], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) result = model.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.459, 2.258, -1.685], - "principal_component_2": [-1.120, -1.351, -0.874], - "principal_component_3": [-0.646, 0.443, -0.704], - "principal_component_4": [-0.539, 0.234, -0.571], - "principal_component_5": [-0.876, 0.122, 0.609], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - tests.system.utils.assert_pandas_df_equal_pca( + utils.check_pandas_df_schema_and_index( result, - expected, - check_exact=False, - rtol=0.1, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", + "principal_component_4", + "principal_component_5", + ], + index=[1633, 1672, 1690], ) # save, load, check n_components to ensure configuration was kept diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py index 3d1fcaf41c..b8adfb36b2 100644 --- a/tests/system/large/ml/test_ensemble.py +++ b/tests/system/large/ml/test_ensemble.py @@ -12,17 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from unittest import TestCase - -import pandas import pytest import bigframes.ml.ensemble +from tests.system import utils -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") -@pytest.mark.flaky(retries=2) def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBRegressor() @@ -42,19 +37,28 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pandas.DataFrame( - { - "mean_absolute_error": [97.368139], - "mean_squared_error": [16284.877027], - "mean_squared_log_error": [0.0010189], - "median_absolute_error": [72.158691], - "r2_score": [0.974784], - "explained_variance": [0.974845], - }, - dtype="Float64", - ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pandas.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) + # expected = pandas.DataFrame( + # { + # "mean_absolute_error": [97.368139], + # "mean_squared_error": [16284.877027], + # "mean_squared_log_error": [0.0010189], + # "median_absolute_error": [72.158691], + # "r2_score": [0.974784], + # "explained_variance": [0.974845], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + # pandas.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) + eval_metrics = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -66,7 +70,7 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): ) -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_xgbregressor_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -103,16 +107,25 @@ def test_xgbregressor_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "mean_absolute_error", + # "mean_squared_error", + # "mean_squared_log_error", + # "median_absolute_error", + # "r2_score", + # "explained_variance", + # ]: + # assert col_name in result.columns + eval_metrics = [ "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "explained_variance", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -140,7 +153,7 @@ def test_xgbregressor_dart_booster_multiple_params( assert reloaded_model.n_estimators == 2 -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBClassifier() @@ -159,16 +172,25 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "precision", + # "recall", + # "accuracy", + # "f1_score", + # "log_loss", + # "roc_auc", + # ]: + # assert col_name in result.columns + eval_metrics = [ "precision", "recall", "accuracy", "f1_score", "log_loss", "roc_auc", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -180,7 +202,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): ) -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_xgbclassifier_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -216,16 +238,25 @@ def test_xgbclassifier_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "precision", + # "recall", + # "accuracy", + # "f1_score", + # "log_loss", + # "roc_auc", + # ]: + # assert col_name in result.columns + eval_metrics = [ "precision", "recall", "accuracy", "f1_score", "log_loss", "roc_auc", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -253,7 +284,7 @@ def test_xgbclassifier_dart_booster_multiple_params( assert reloaded_model.n_estimators == 2 -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_randomforestregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor() @@ -273,16 +304,25 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "mean_absolute_error", + # "mean_squared_error", + # "mean_squared_log_error", + # "median_absolute_error", + # "r2_score", + # "explained_variance", + # ]: + # assert col_name in result.columns + eval_metrics = [ "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "explained_variance", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -294,7 +334,7 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset ) -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_randomforestregressor_multiple_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor( tree_method="auto", @@ -326,16 +366,25 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "mean_absolute_error", + # "mean_squared_error", + # "mean_squared_log_error", + # "median_absolute_error", + # "r2_score", + # "explained_variance", + # ]: + # assert col_name in result.columns + eval_metrics = [ "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "explained_variance", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -360,7 +409,7 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase assert reloaded_model.enable_global_explain is False -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_randomforestclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestClassifier() @@ -379,16 +428,25 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "precision", + # "recall", + # "accuracy", + # "f1_score", + # "log_loss", + # "roc_auc", + # ]: + # assert col_name in result.columns + eval_metrics = [ "precision", "recall", "accuracy", "f1_score", "log_loss", "roc_auc", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -431,16 +489,25 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ + # TestCase().assertSequenceEqual(result.shape, (1, 6)) + # for col_name in [ + # "precision", + # "recall", + # "accuracy", + # "f1_score", + # "log_loss", + # "roc_auc", + # ]: + # assert col_name in result.columns + eval_metrics = [ "precision", "recall", "accuracy", "f1_score", "log_loss", "roc_auc", - ]: - assert col_name in result.columns + ] + utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py index ef74398c2e..74ba12c6c6 100644 --- a/tests/system/large/ml/test_forecasting.py +++ b/tests/system/large/ml/test_forecasting.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd import pytest from bigframes.ml import forecasting +from tests.system import utils ARIMA_EVALUATE_OUTPUT_COL = [ "non_seasonal_p", "non_seasonal_d", "non_seasonal_q", + "has_drift", "log_likelihood", "AIC", "variance", @@ -50,18 +51,28 @@ def test_arima_plus_model_fit_score( result = arima_model.score( new_time_series_df[["parsed_date"]], new_time_series_df[["total_visits"]] ).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [154.742547], - "mean_squared_error": [26844.868855], - "root_mean_squared_error": [163.844038], - "mean_absolute_percentage_error": [6.189702], - "symmetric_mean_absolute_percentage_error": [6.097155], - }, - dtype="Float64", + # expected = pd.DataFrame( + # { + # "mean_absolute_error": [154.742547], + # "mean_squared_error": [26844.868855], + # "root_mean_squared_error": [163.844038], + # "mean_absolute_percentage_error": [6.189702], + # "symmetric_mean_absolute_percentage_error": [6.097155], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "mean_absolute_error", + "mean_squared_error", + "root_mean_squared_error", + "mean_absolute_percentage_error", + "symmetric_mean_absolute_percentage_error", + ], + index=1, ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load to ensure configuration was kept reloaded_model = arima_model.to_gbq( @@ -73,10 +84,10 @@ def test_arima_plus_model_fit_score( def test_arima_plus_model_fit_summary(dataset_id, arima_model): - - result = arima_model.summary() - assert result.shape == (1, 12) - assert all(column in result.columns for column in ARIMA_EVALUATE_OUTPUT_COL) + result = arima_model.summary().to_pandas() + utils.check_pandas_df_schema_and_index( + result, columns=ARIMA_EVALUATE_OUTPUT_COL, index=1 + ) # save, load to ensure configuration was kept reloaded_model = arima_model.to_gbq( @@ -88,13 +99,13 @@ def test_arima_plus_model_fit_summary(dataset_id, arima_model): def test_arima_coefficients(arima_model): - got = arima_model.coef_ - expected_columns = { + result = arima_model.coef_.to_pandas() + expected_columns = [ "ar_coefficients", "ma_coefficients", "intercept_or_drift", - } - assert set(got.columns) == expected_columns + ] + utils.check_pandas_df_schema_and_index(result, columns=expected_columns, index=1) def test_arima_plus_model_fit_params(time_series_df_default_index, dataset_id): diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py index 99121e4a31..f508edfa9e 100644 --- a/tests/system/large/ml/test_linear_model.py +++ b/tests/system/large/ml/test_linear_model.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd - import bigframes.ml.linear_model +from tests.system import utils def test_linear_regression_configure_fit_score(penguins_df_default_index, dataset_id): @@ -36,19 +35,21 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [225.735767], - "mean_squared_error": [80417.461828], - "mean_squared_log_error": [0.004967], - "median_absolute_error": [172.543702], - "r2_score": [0.87548], - "explained_variance": [0.87548], - }, - dtype="Float64", + # expected = pd.DataFrame( + # { + # "mean_absolute_error": [225.735767], + # "mean_squared_error": [80417.461828], + # "mean_squared_log_error": [0.004967], + # "median_absolute_error": [172.543702], + # "r2_score": [0.87548], + # "explained_variance": [0.87548], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True) @@ -98,19 +99,21 @@ def test_linear_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [240], - "mean_squared_error": [91197], - "mean_squared_log_error": [0.00573], - "median_absolute_error": [197], - "r2_score": [0.858], - "explained_variance": [0.8588], - }, - dtype="Float64", + # expected = pd.DataFrame( + # { + # "mean_absolute_error": [240], + # "mean_squared_error": [91197], + # "mean_squared_log_error": [0.00573], + # "median_absolute_error": [197], + # "r2_score": [0.858], + # "explained_variance": [0.8588], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True) @@ -152,19 +155,21 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "precision": [0.616753], - "recall": [0.618615], - "accuracy": [0.92515], - "f1_score": [0.617681], - "log_loss": [1.498832], - "roc_auc": [0.975807], - }, - dtype="Float64", + # expected = pd.DataFrame( + # { + # "precision": [0.616753], + # "recall": [0.618615], + # "accuracy": [0.92515], + # "f1_score": [0.617681], + # "log_loss": [1.498832], + # "roc_auc": [0.975807], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -207,19 +212,21 @@ def test_logistic_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "precision": [0.487], - "recall": [0.602], - "accuracy": [0.464], - "f1_score": [0.379], - "log_loss": [0.972], - "roc_auc": [0.700], - }, - dtype="Float64", + # expected = pd.DataFrame( + # { + # "precision": [0.487], + # "recall": [0.602], + # "accuracy": [0.464], + # "f1_score": [0.379], + # "log_loss": [0.972], + # "roc_auc": [0.700], + # }, + # dtype="Float64", + # ) + # expected = expected.reindex(index=expected.index.astype("Int64")) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py index 1a92d0f7d4..1a241315bc 100644 --- a/tests/system/large/ml/test_pipeline.py +++ b/tests/system/large/ml/test_pipeline.py @@ -24,7 +24,7 @@ pipeline, preprocessing, ) -from tests.system.utils import assert_pandas_df_equal, assert_pandas_df_equal_pca +from tests.system import utils def test_pipeline_linear_regression_fit_score_predict( @@ -51,21 +51,21 @@ def test_pipeline_linear_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [309.477331], - "mean_squared_error": [152184.227219], - "mean_squared_log_error": [0.009524], - "median_absolute_error": [257.728263], - "r2_score": [0.764356], - "explained_variance": [0.764356], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + # score_expected = pd.DataFrame( + # { + # "mean_absolute_error": [309.477331], + # "mean_squared_error": [152184.227219], + # "mean_squared_log_error": [0.009524], + # "median_absolute_error": [257.728263], + # "r2_score": [0.764356], + # "explained_variance": [0.764356], + # }, + # dtype="Float64", + # ) + # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) + + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -87,13 +87,19 @@ def test_pipeline_linear_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + # expected = pd.DataFrame( + # {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, + # dtype="Float64", + # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + # ) + # pd.testing.assert_frame_equal( + # predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + # ) + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -115,21 +121,24 @@ def test_pipeline_linear_regression_series_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [528.495599], - "mean_squared_error": [421722.261808], - "mean_squared_log_error": [0.022963], - "median_absolute_error": [468.895249], - "r2_score": [0.346999], - "explained_variance": [0.346999], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + # score_expected = pd.DataFrame( + # { + # "mean_absolute_error": [528.495599], + # "mean_squared_error": [421722.261808], + # "mean_squared_log_error": [0.022963], + # "median_absolute_error": [468.895249], + # "r2_score": [0.346999], + # "explained_variance": [0.346999], + # }, + # dtype="Float64", + # ) + # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) + + # pd.testing.assert_frame_equal( + # score_result, score_expected, check_exact=False, rtol=0.1 + # ) + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -142,13 +151,19 @@ def test_pipeline_linear_regression_series_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins["culmen_length_mm"]).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3818.845703, 3732.022253, 3679.928123]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + # expected = pd.DataFrame( + # {"predicted_body_mass_g": [3818.845703, 3732.022253, 3679.928123]}, + # dtype="Float64", + # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + # ) + # pd.testing.assert_frame_equal( + # predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + # ) + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -176,21 +191,24 @@ def test_pipeline_logistic_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "precision": [0.537091], - "recall": [0.538636], - "accuracy": [0.805389], - "f1_score": [0.537716], - "log_loss": [1.445433], - "roc_auc": [0.917818], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + # score_expected = pd.DataFrame( + # { + # "precision": [0.537091], + # "recall": [0.538636], + # "accuracy": [0.805389], + # "f1_score": [0.537716], + # "log_loss": [1.445433], + # "roc_auc": [0.917818], + # }, + # dtype="Float64", + # ) + # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) + + # pd.testing.assert_frame_equal( + # score_result, score_expected, check_exact=False, rtol=0.1 + # ) + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) # predict new labels @@ -211,19 +229,23 @@ def test_pipeline_logistic_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, - dtype=pd.StringDtype(storage="pyarrow"), - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_sex"]], - expected, + # expected = pd.DataFrame( + # {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, + # dtype=pd.StringDtype(storage="pyarrow"), + # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + # ) + # pd.testing.assert_frame_equal( + # predictions[["predicted_sex"]], + # expected, + # ) + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_sex"], + index=[1633, 1672, 1690], + col_exact=False, ) -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") @pytest.mark.flaky(retries=2) def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_index): """Test a supervised model with a minimal preprocessing step""" @@ -247,21 +269,8 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [202.298434], - "mean_squared_error": [74515.108971], - "mean_squared_log_error": [0.004365], - "median_absolute_error": [142.949219], - "r2_score": [0.88462], - "explained_variance": [0.886454], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -283,24 +292,14 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "predicted_body_mass_g": [ - 4287.34521484375, - 3198.351806640625, - 3385.34130859375, - ] - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") @pytest.mark.flaky(retries=2) def test_pipeline_random_forest_classifier_fit_score_predict( session, penguins_df_default_index @@ -326,21 +325,8 @@ def test_pipeline_random_forest_classifier_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "precision": [0.585505], - "recall": [0.58676], - "accuracy": [0.877246], - "f1_score": [0.585657], - "log_loss": [0.880643], - "roc_auc": [0.970697], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) # predict new labels @@ -361,14 +347,11 @@ def test_pipeline_random_forest_classifier_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, - dtype=pd.StringDtype(storage="pyarrow"), - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_sex"]], - expected, + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_sex"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -412,40 +395,20 @@ def test_pipeline_PCA_fit_score_predict(session, penguins_df_default_index): # Check score to ensure the model was fitted score_result = pl.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [1.0], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.115259, -1.506141, -1.471173], - "principal_component_2": [-0.074825, 0.69664, 0.406103], - "principal_component_3": [0.500013, -0.544479, 0.075849], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - assert_pandas_df_equal_pca( - predictions[ - [ - "principal_component_1", - "principal_component_2", - "principal_component_3", - ] + utils.check_pandas_df_schema_and_index( + predictions, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", ], - expected, - check_exact=False, - rtol=0.1, + index=[1633, 1672, 1690], + col_exact=False, ) @@ -538,29 +501,16 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - {"davies_bouldin_index": [7.542981], "mean_squared_distance": [94.692409]}, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 - ) + eval_metrics = ["davies_bouldin_index", "mean_squared_distance"] + utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1) predictions = pl.predict(new_penguins).to_pandas().sort_index() - assert predictions.shape == (6, 9) - result = predictions[["CENTROID_ID"]] - expected = pd.DataFrame( - {"CENTROID_ID": [1, 2, 1, 2, 1, 2]}, - dtype="Int64", - index=pd.Index( - ["test1", "test2", "test3", "test4", "test5", "test6"], - dtype="string[pyarrow]", - ), + utils.check_pandas_df_schema_and_index( + predictions, + columns=["CENTROID_ID"], + index=["test1", "test2", "test3", "test4", "test5", "test6"], + col_exact=False, ) - expected.index.name = "observation" - assert_pandas_df_equal(result, expected, ignore_order=True) def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_index): @@ -632,13 +582,11 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3909.2, 3436.0, 2860.0]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) diff --git a/tests/system/utils.py b/tests/system/utils.py index e40502e6f2..75897b7942 100644 --- a/tests/system/utils.py +++ b/tests/system/utils.py @@ -15,7 +15,7 @@ import base64 import decimal import functools -from typing import Iterable, Optional, Set +from typing import Iterable, Optional, Set, Union import geopandas as gpd # type: ignore import google.api_core.operation @@ -28,6 +28,23 @@ from bigframes.functions import remote_function +ML_REGRESSION_METRICS = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", +] +ML_CLASSFICATION_METRICS = [ + "precision", + "recall", + "accuracy", + "f1_score", + "log_loss", + "roc_auc", +] + def skip_legacy_pandas(test): @functools.wraps(test) @@ -249,6 +266,26 @@ def assert_pandas_df_equal_pca(actual, expected, **kwargs): pd.testing.assert_series_equal(-actual[column], expected[column], **kwargs) +def check_pandas_df_schema_and_index( + actual: pd.DataFrame, + columns: Iterable, + index: Union[int, Iterable], + col_exact: bool = True, +): + """Check pandas df schema and index. But not the values.""" + if col_exact: + assert list(actual.columns) == list(columns) + else: + assert set(columns) <= set(actual.columns) + + if isinstance(index, int): + assert len(actual) == index + elif isinstance(index, Iterable): + assert list(actual.index) == list(index) + else: + raise ValueError("Unsupported index type.") + + def get_remote_function_endpoints( bigquery_client: bigquery.Client, dataset_id: str ) -> Set[str]: From c8846f117f6c8a01cddc69c8514b432c0827be91 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 15 May 2024 17:41:23 +0000 Subject: [PATCH 2/2] clean up --- tests/system/large/ml/test_ensemble.py | 190 ++++----------------- tests/system/large/ml/test_forecasting.py | 11 -- tests/system/large/ml/test_linear_model.py | 48 ------ tests/system/large/ml/test_pipeline.py | 70 -------- tests/system/utils.py | 19 ++- 5 files changed, 43 insertions(+), 295 deletions(-) diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py index b8adfb36b2..36c0e6cb17 100644 --- a/tests/system/large/ml/test_ensemble.py +++ b/tests/system/large/ml/test_ensemble.py @@ -18,6 +18,7 @@ from tests.system import utils +@pytest.mark.flaky(retries=2) def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBRegressor() @@ -37,28 +38,9 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # expected = pandas.DataFrame( - # { - # "mean_absolute_error": [97.368139], - # "mean_squared_error": [16284.877027], - # "mean_squared_log_error": [0.0010189], - # "median_absolute_error": [72.158691], - # "r2_score": [0.974784], - # "explained_variance": [0.974845], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) - # pandas.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) - eval_metrics = [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -70,7 +52,7 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): ) -# @pytest.mark.flaky(retries=2) +@pytest.mark.flaky(retries=2) def test_xgbregressor_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -107,25 +89,9 @@ def test_xgbregressor_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "mean_absolute_error", - # "mean_squared_error", - # "mean_squared_log_error", - # "median_absolute_error", - # "r2_score", - # "explained_variance", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -153,7 +119,7 @@ def test_xgbregressor_dart_booster_multiple_params( assert reloaded_model.n_estimators == 2 -# @pytest.mark.flaky(retries=2) +@pytest.mark.flaky(retries=2) def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBClassifier() @@ -172,25 +138,9 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "precision", - # "recall", - # "accuracy", - # "f1_score", - # "log_loss", - # "roc_auc", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -238,25 +188,9 @@ def test_xgbclassifier_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "precision", - # "recall", - # "accuracy", - # "f1_score", - # "log_loss", - # "roc_auc", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -284,7 +218,7 @@ def test_xgbclassifier_dart_booster_multiple_params( assert reloaded_model.n_estimators == 2 -# @pytest.mark.flaky(retries=2) +@pytest.mark.flaky(retries=2) def test_randomforestregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor() @@ -304,25 +238,9 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "mean_absolute_error", - # "mean_squared_error", - # "mean_squared_log_error", - # "median_absolute_error", - # "r2_score", - # "explained_variance", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -334,7 +252,7 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset ) -# @pytest.mark.flaky(retries=2) +@pytest.mark.flaky(retries=2) def test_randomforestregressor_multiple_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestRegressor( tree_method="auto", @@ -366,25 +284,9 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "mean_absolute_error", - # "mean_squared_error", - # "mean_squared_log_error", - # "median_absolute_error", - # "r2_score", - # "explained_variance", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -409,7 +311,7 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase assert reloaded_model.enable_global_explain is False -# @pytest.mark.flaky(retries=2) +@pytest.mark.flaky(retries=2) def test_randomforestclassifier_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.RandomForestClassifier() @@ -428,25 +330,9 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "precision", - # "recall", - # "accuracy", - # "f1_score", - # "log_loss", - # "roc_auc", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -489,25 +375,9 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # TestCase().assertSequenceEqual(result.shape, (1, 6)) - # for col_name in [ - # "precision", - # "recall", - # "accuracy", - # "f1_score", - # "log_loss", - # "roc_auc", - # ]: - # assert col_name in result.columns - eval_metrics = [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ] - utils.check_pandas_df_schema_and_index(result, columns=eval_metrics, index=1) + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py index 74ba12c6c6..57137ea64f 100644 --- a/tests/system/large/ml/test_forecasting.py +++ b/tests/system/large/ml/test_forecasting.py @@ -51,17 +51,6 @@ def test_arima_plus_model_fit_score( result = arima_model.score( new_time_series_df[["parsed_date"]], new_time_series_df[["total_visits"]] ).to_pandas() - # expected = pd.DataFrame( - # { - # "mean_absolute_error": [154.742547], - # "mean_squared_error": [26844.868855], - # "root_mean_squared_error": [163.844038], - # "mean_absolute_percentage_error": [6.189702], - # "symmetric_mean_absolute_percentage_error": [6.097155], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) utils.check_pandas_df_schema_and_index( result, columns=[ diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py index f508edfa9e..69193adfa7 100644 --- a/tests/system/large/ml/test_linear_model.py +++ b/tests/system/large/ml/test_linear_model.py @@ -35,18 +35,6 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # expected = pd.DataFrame( - # { - # "mean_absolute_error": [225.735767], - # "mean_squared_error": [80417.461828], - # "mean_squared_log_error": [0.004967], - # "median_absolute_error": [172.543702], - # "r2_score": [0.87548], - # "explained_variance": [0.87548], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) utils.check_pandas_df_schema_and_index( result, columns=utils.ML_REGRESSION_METRICS, index=1 ) @@ -99,18 +87,6 @@ def test_linear_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # expected = pd.DataFrame( - # { - # "mean_absolute_error": [240], - # "mean_squared_error": [91197], - # "mean_squared_log_error": [0.00573], - # "median_absolute_error": [197], - # "r2_score": [0.858], - # "explained_variance": [0.8588], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) utils.check_pandas_df_schema_and_index( result, columns=utils.ML_REGRESSION_METRICS, index=1 ) @@ -155,18 +131,6 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # expected = pd.DataFrame( - # { - # "precision": [0.616753], - # "recall": [0.618615], - # "accuracy": [0.92515], - # "f1_score": [0.617681], - # "log_loss": [1.498832], - # "roc_auc": [0.975807], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) utils.check_pandas_df_schema_and_index( result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) @@ -212,18 +176,6 @@ def test_logistic_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - # expected = pd.DataFrame( - # { - # "precision": [0.487], - # "recall": [0.602], - # "accuracy": [0.464], - # "f1_score": [0.379], - # "log_loss": [0.972], - # "roc_auc": [0.700], - # }, - # dtype="Float64", - # ) - # expected = expected.reindex(index=expected.index.astype("Int64")) utils.check_pandas_df_schema_and_index( result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py index 1a241315bc..3d7eb2e426 100644 --- a/tests/system/large/ml/test_pipeline.py +++ b/tests/system/large/ml/test_pipeline.py @@ -51,19 +51,6 @@ def test_pipeline_linear_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - # score_expected = pd.DataFrame( - # { - # "mean_absolute_error": [309.477331], - # "mean_squared_error": [152184.227219], - # "mean_squared_log_error": [0.009524], - # "median_absolute_error": [257.728263], - # "r2_score": [0.764356], - # "explained_variance": [0.764356], - # }, - # dtype="Float64", - # ) - # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - utils.check_pandas_df_schema_and_index( score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) @@ -87,14 +74,6 @@ def test_pipeline_linear_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - # expected = pd.DataFrame( - # {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, - # dtype="Float64", - # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - # ) - # pd.testing.assert_frame_equal( - # predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 - # ) utils.check_pandas_df_schema_and_index( predictions, columns=["predicted_body_mass_g"], @@ -121,22 +100,6 @@ def test_pipeline_linear_regression_series_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - # score_expected = pd.DataFrame( - # { - # "mean_absolute_error": [528.495599], - # "mean_squared_error": [421722.261808], - # "mean_squared_log_error": [0.022963], - # "median_absolute_error": [468.895249], - # "r2_score": [0.346999], - # "explained_variance": [0.346999], - # }, - # dtype="Float64", - # ) - # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - # pd.testing.assert_frame_equal( - # score_result, score_expected, check_exact=False, rtol=0.1 - # ) utils.check_pandas_df_schema_and_index( score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) @@ -151,14 +114,6 @@ def test_pipeline_linear_regression_series_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins["culmen_length_mm"]).to_pandas() - # expected = pd.DataFrame( - # {"predicted_body_mass_g": [3818.845703, 3732.022253, 3679.928123]}, - # dtype="Float64", - # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - # ) - # pd.testing.assert_frame_equal( - # predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 - # ) utils.check_pandas_df_schema_and_index( predictions, columns=["predicted_body_mass_g"], @@ -191,22 +146,6 @@ def test_pipeline_logistic_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - # score_expected = pd.DataFrame( - # { - # "precision": [0.537091], - # "recall": [0.538636], - # "accuracy": [0.805389], - # "f1_score": [0.537716], - # "log_loss": [1.445433], - # "roc_auc": [0.917818], - # }, - # dtype="Float64", - # ) - # score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - # pd.testing.assert_frame_equal( - # score_result, score_expected, check_exact=False, rtol=0.1 - # ) utils.check_pandas_df_schema_and_index( score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) @@ -229,15 +168,6 @@ def test_pipeline_logistic_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - # expected = pd.DataFrame( - # {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, - # dtype=pd.StringDtype(storage="pyarrow"), - # index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - # ) - # pd.testing.assert_frame_equal( - # predictions[["predicted_sex"]], - # expected, - # ) utils.check_pandas_df_schema_and_index( predictions, columns=["predicted_sex"], diff --git a/tests/system/utils.py b/tests/system/utils.py index 75897b7942..ab4c2c119f 100644 --- a/tests/system/utils.py +++ b/tests/system/utils.py @@ -267,21 +267,28 @@ def assert_pandas_df_equal_pca(actual, expected, **kwargs): def check_pandas_df_schema_and_index( - actual: pd.DataFrame, + pd_df: pd.DataFrame, columns: Iterable, index: Union[int, Iterable], col_exact: bool = True, ): - """Check pandas df schema and index. But not the values.""" + """Check pandas df schema and index. But not the values. + + Args: + pd_df: the input pandas df + columns: target columns to check with + index: int or Iterable. If int, only check the length (index size) of the df. If Iterable, check index values match + col_exact: If True, check the columns param are exact match. Otherwise only check the df contains all of those columns + """ if col_exact: - assert list(actual.columns) == list(columns) + assert list(pd_df.columns) == list(columns) else: - assert set(columns) <= set(actual.columns) + assert set(columns) <= set(pd_df.columns) if isinstance(index, int): - assert len(actual) == index + assert len(pd_df) == index elif isinstance(index, Iterable): - assert list(actual.index) == list(index) + assert list(pd_df.index) == list(index) else: raise ValueError("Unsupported index type.")