diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py index b65b6e5083..454c94599d 100644 --- a/tests/system/large/ml/test_cluster.py +++ b/tests/system/large/ml/test_cluster.py @@ -13,13 +13,11 @@ # limitations under the License. import pandas as pd -import pytest from bigframes.ml import cluster -from tests.system.utils import assert_pandas_df_equal +from tests.system import utils -@pytest.mark.flaky(retries=2) def test_cluster_configure_fit_score_predict( session, penguins_df_default_index, dataset_id ): @@ -88,26 +86,18 @@ def test_cluster_configure_fit_score_predict( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - {"davies_bouldin_index": [1.502182], "mean_squared_distance": [1.953408]}, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 - ) + eval_metrics = ["davies_bouldin_index", "mean_squared_distance"] + utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1) predictions = model.predict(new_penguins).to_pandas() assert predictions.shape == (4, 9) - result = predictions[["CENTROID_ID"]] - expected = pd.DataFrame( - {"CENTROID_ID": [2, 3, 1, 2]}, - dtype="Int64", - index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"), + utils.check_pandas_df_schema_and_index( + predictions, + columns=["CENTROID_ID"], + index=["test1", "test2", "test3", "test4"], + col_exact=False, ) - expected.index.name = "observation" - assert_pandas_df_equal(result, expected, ignore_order=True) # save, load, check n_clusters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_compose.py b/tests/system/large/ml/test_compose.py index 38c5014c19..45322e78dd 100644 --- a/tests/system/large/ml/test_compose.py +++ b/tests/system/large/ml/test_compose.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas - from bigframes.ml import compose, preprocessing +from tests.system import utils def test_columntransformer_standalone_fit_and_transform( @@ -45,26 +44,18 @@ def test_columntransformer_standalone_fit_and_transform( ) result = transformer.transform(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - -0.811119671289163, - -0.9945520581113803, - -1.104611490204711, - ], - "min_max_scaled_culmen_length_mm": [0.269, 0.232, 0.210], - "standard_scaled_flipper_length_mm": [-0.350044, -1.418336, -0.9198], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "min_max_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) - def test_columntransformer_standalone_fit_transform(new_penguins_df): transformer = compose.ColumnTransformer( @@ -86,25 +77,17 @@ def test_columntransformer_standalone_fit_transform(new_penguins_df): new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]] ).to_pandas() - expected = pandas.DataFrame( - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - 1.313249, - -0.20198, - -1.111118, - ], - "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) - def test_columntransformer_save_load(new_penguins_df, dataset_id): transformer = compose.ColumnTransformer( @@ -147,23 +130,13 @@ def test_columntransformer_save_load(new_penguins_df, dataset_id): new_penguins_df[["species", "culmen_length_mm", "flipper_length_mm"]] ).to_pandas() - # TODO(b/340888429): fix type error - expected = pandas.DataFrame( # type: ignore - { - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - "standard_scaled_culmen_length_mm": [ - 1.313249, - -0.20198, - -1.111118, - ], - "standard_scaled_flipper_length_mm": [1.251098, -1.196588, -0.054338], - }, - index=pandas.Index([1633, 1672, 1690], dtype="Int64", name="tag_number"), + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "onehotencoded_species", + "standard_scaled_culmen_length_mm", + "standard_scaled_flipper_length_mm", + ], + index=[1633, 1672, 1690], + col_exact=False, ) - - # TODO(b/340888429): fix type error - pandas.testing.assert_frame_equal(result, expected, rtol=0.1, check_dtype=False) # type: ignore diff --git a/tests/system/large/ml/test_core.py b/tests/system/large/ml/test_core.py index aec1065e41..be5eea925f 100644 --- a/tests/system/large/ml/test_core.py +++ b/tests/system/large/ml/test_core.py @@ -12,14 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas -import pytest - from bigframes.ml import globals +from tests.system import utils -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_df): df = penguins_df_default_index.dropna() X_train = df[ @@ -38,41 +34,33 @@ def test_bqml_e2e(session, dataset_id, penguins_df_default_index, new_penguins_d X_train, y_train, options={"model_type": "linear_reg"} ) + eval_metrics = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", + ] # no data - report evaluation from the automatic data split evaluate_result = model.evaluate().to_pandas() - evaluate_expected = pandas.DataFrame( - { - "mean_absolute_error": [225.817334], - "mean_squared_error": [80540.705944], - "mean_squared_log_error": [0.004972], - "median_absolute_error": [173.080816], - "r2_score": [0.87529], - "explained_variance": [0.87529], - }, - dtype="Float64", - ) - evaluate_expected = evaluate_expected.reindex( - index=evaluate_expected.index.astype("Int64") - ) - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # evaluate on all training data evaluate_result = model.evaluate(df).to_pandas() - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # predict new labels predictions = model.predict(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]}, - dtype="Float64", - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pandas.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) new_name = f"{dataset_id}.my_model" @@ -108,42 +96,34 @@ def test_bqml_manual_preprocessing_e2e( X_train, y_train, transforms=transforms, options=options ) + eval_metrics = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", + ] + # no data - report evaluation from the automatic data split evaluate_result = model.evaluate().to_pandas() - evaluate_expected = pandas.DataFrame( - { - "mean_absolute_error": [309.477334], - "mean_squared_error": [152184.227218], - "mean_squared_log_error": [0.009524], - "median_absolute_error": [257.727777], - "r2_score": [0.764356], - "explained_variance": [0.764356], - }, - dtype="Float64", - ) - evaluate_expected = evaluate_expected.reindex( - index=evaluate_expected.index.astype("Int64") - ) - - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # evaluate on all training data evaluate_result = model.evaluate(df).to_pandas() - pandas.testing.assert_frame_equal( - evaluate_result, evaluate_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + evaluate_result, columns=eval_metrics, index=1 ) # predict new labels predictions = model.predict(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, - dtype="Float64", - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pandas.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) new_name = f"{dataset_id}.my_model" @@ -168,24 +148,9 @@ def test_bqml_standalone_transform(penguins_df_default_index, new_penguins_df): ) transformed = model.transform(new_penguins_df).to_pandas() - expected = pandas.DataFrame( - { - "scaled_culmen_length_mm": [-0.8099, -0.9931, -1.103], - "onehotencoded_species": [ - [{"index": 1, "value": 1.0}], - [{"index": 1, "value": 1.0}], - [{"index": 2, "value": 1.0}], - ], - }, - index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - expected["scaled_culmen_length_mm"] = expected["scaled_culmen_length_mm"].astype( - "Float64" - ) - pandas.testing.assert_frame_equal( - transformed[["scaled_culmen_length_mm", "onehotencoded_species"]], - expected, - check_exact=False, - rtol=0.1, - check_dtype=False, + utils.check_pandas_df_schema_and_index( + transformed, + columns=["scaled_culmen_length_mm", "onehotencoded_species"], + index=[1633, 1672, 1690], + col_exact=False, ) diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py index 9dab85023a..4312c24125 100644 --- a/tests/system/large/ml/test_decomposition.py +++ b/tests/system/large/ml/test_decomposition.py @@ -15,7 +15,7 @@ import pandas as pd from bigframes.ml import decomposition -import tests.system.utils +from tests.system import utils def test_decomposition_configure_fit_score_predict( @@ -45,34 +45,19 @@ def test_decomposition_configure_fit_score_predict( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [0.812383], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) result = model.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.459, 2.258, -1.685], - "principal_component_2": [-1.120, -1.351, -0.874], - "principal_component_3": [-0.646, 0.443, -0.704], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - tests.system.utils.assert_pandas_df_equal_pca( + utils.check_pandas_df_schema_and_index( result, - expected, - check_exact=False, - rtol=0.1, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", + ], + index=[1633, 1672, 1690], ) # save, load, check n_components to ensure configuration was kept @@ -114,36 +99,21 @@ def test_decomposition_configure_fit_score_predict_params( # Check score to ensure the model was fitted score_result = model.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [0.932897], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) result = model.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.459, 2.258, -1.685], - "principal_component_2": [-1.120, -1.351, -0.874], - "principal_component_3": [-0.646, 0.443, -0.704], - "principal_component_4": [-0.539, 0.234, -0.571], - "principal_component_5": [-0.876, 0.122, 0.609], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - tests.system.utils.assert_pandas_df_equal_pca( + utils.check_pandas_df_schema_and_index( result, - expected, - check_exact=False, - rtol=0.1, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", + "principal_component_4", + "principal_component_5", + ], + index=[1633, 1672, 1690], ) # save, load, check n_components to ensure configuration was kept diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py index b9aae21956..b71d023302 100644 --- a/tests/system/large/ml/test_ensemble.py +++ b/tests/system/large/ml/test_ensemble.py @@ -12,16 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from unittest import TestCase - -import pandas import pytest import bigframes.ml.ensemble +from tests.system import utils -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") @pytest.mark.flaky(retries=2) def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): model = bigframes.ml.ensemble.XGBRegressor() @@ -42,19 +38,9 @@ def test_xgbregressor_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pandas.DataFrame( - { - "mean_absolute_error": [97.368139], - "mean_squared_error": [16284.877027], - "mean_squared_log_error": [0.0010189], - "median_absolute_error": [72.158691], - "r2_score": [0.974784], - "explained_variance": [0.974845], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pandas.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -104,16 +90,9 @@ def test_xgbregressor_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -161,16 +140,9 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -183,7 +155,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): ) -@pytest.mark.flaky(retries=2) +# @pytest.mark.flaky(retries=2) def test_xgbclassifier_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): @@ -219,16 +191,9 @@ def test_xgbclassifier_dart_booster_multiple_params( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -277,16 +242,9 @@ def test_randomforestregressor_default_params(penguins_df_default_index, dataset # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -331,16 +289,9 @@ def test_randomforestregressor_multiple_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", - "median_absolute_error", - "r2_score", - "explained_variance", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -385,16 +336,9 @@ def test_randomforestclassifier_default_params(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -439,16 +383,9 @@ def test_randomforestclassifier_multiple_params(penguins_df_default_index, datas # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - TestCase().assertSequenceEqual(result.shape, (1, 6)) - for col_name in [ - "precision", - "recall", - "accuracy", - "f1_score", - "log_loss", - "roc_auc", - ]: - assert col_name in result.columns + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 + ) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_forecasting.py b/tests/system/large/ml/test_forecasting.py index 1c0924245e..be7cf8c93d 100644 --- a/tests/system/large/ml/test_forecasting.py +++ b/tests/system/large/ml/test_forecasting.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd import pytest from bigframes.ml import forecasting +from tests.system import utils ARIMA_EVALUATE_OUTPUT_COL = [ "non_seasonal_p", "non_seasonal_d", "non_seasonal_q", + "has_drift", "log_likelihood", "AIC", "variance", @@ -50,18 +51,17 @@ def test_arima_plus_model_fit_score( result = arima_model.score( new_time_series_df[["parsed_date"]], new_time_series_df[["total_visits"]] ).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [154.742547], - "mean_squared_error": [26844.868855], - "root_mean_squared_error": [163.844038], - "mean_absolute_percentage_error": [6.189702], - "symmetric_mean_absolute_percentage_error": [6.097155], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, + columns=[ + "mean_absolute_error", + "mean_squared_error", + "root_mean_squared_error", + "mean_absolute_percentage_error", + "symmetric_mean_absolute_percentage_error", + ], + index=1, ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load to ensure configuration was kept reloaded_model = arima_model.to_gbq( @@ -73,10 +73,10 @@ def test_arima_plus_model_fit_score( def test_arima_plus_model_fit_summary(dataset_id, arima_model): - - result = arima_model.summary() - assert result.shape == (1, 12) - assert all(column in result.columns for column in ARIMA_EVALUATE_OUTPUT_COL) + result = arima_model.summary().to_pandas() + utils.check_pandas_df_schema_and_index( + result, columns=ARIMA_EVALUATE_OUTPUT_COL, index=1 + ) # save, load to ensure configuration was kept reloaded_model = arima_model.to_gbq( @@ -88,13 +88,13 @@ def test_arima_plus_model_fit_summary(dataset_id, arima_model): def test_arima_coefficients(arima_model): - got = arima_model.coef_ - expected_columns = { + result = arima_model.coef_.to_pandas() + expected_columns = [ "ar_coefficients", "ma_coefficients", "intercept_or_drift", - } - assert set(got.columns) == expected_columns + ] + utils.check_pandas_df_schema_and_index(result, columns=expected_columns, index=1) def test_arima_plus_model_fit_params(time_series_df_default_index, dataset_id): diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py index 50e3de3fc4..f1f7985278 100644 --- a/tests/system/large/ml/test_linear_model.py +++ b/tests/system/large/ml/test_linear_model.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pandas as pd - import bigframes.ml.linear_model +from tests.system import utils def test_linear_regression_configure_fit_score(penguins_df_default_index, dataset_id): @@ -36,19 +35,9 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [225.735767], - "mean_squared_error": [80417.461828], - "mean_squared_log_error": [0.004967], - "median_absolute_error": [172.543702], - "r2_score": [0.87548], - "explained_variance": [0.87548], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True) @@ -99,19 +88,9 @@ def test_linear_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "mean_absolute_error": [240], - "mean_squared_error": [91197], - "mean_squared_log_error": [0.00573], - "median_absolute_error": [197], - "r2_score": [0.858], - "explained_variance": [0.8588], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_REGRESSION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq(f"{dataset_id}.temp_configured_model", replace=True) @@ -154,19 +133,9 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "precision": [0.616753], - "recall": [0.618615], - "accuracy": [0.92515], - "f1_score": [0.617681], - "log_loss": [1.498832], - "roc_auc": [0.975807], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( @@ -210,19 +179,9 @@ def test_logistic_regression_customized_params_fit_score( # Check score to ensure the model was fitted result = model.score(X_train, y_train).to_pandas() - expected = pd.DataFrame( - { - "precision": [0.487], - "recall": [0.602], - "accuracy": [0.464], - "f1_score": [0.379], - "log_loss": [0.972], - "roc_auc": [0.700], - }, - dtype="Float64", + utils.check_pandas_df_schema_and_index( + result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) - expected = expected.reindex(index=expected.index.astype("Int64")) - pd.testing.assert_frame_equal(result, expected, check_exact=False, rtol=0.1) # save, load, check parameters to ensure configuration was kept reloaded_model = model.to_gbq( diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py index 1a92d0f7d4..3d7eb2e426 100644 --- a/tests/system/large/ml/test_pipeline.py +++ b/tests/system/large/ml/test_pipeline.py @@ -24,7 +24,7 @@ pipeline, preprocessing, ) -from tests.system.utils import assert_pandas_df_equal, assert_pandas_df_equal_pca +from tests.system import utils def test_pipeline_linear_regression_fit_score_predict( @@ -51,21 +51,8 @@ def test_pipeline_linear_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [309.477331], - "mean_squared_error": [152184.227219], - "mean_squared_log_error": [0.009524], - "median_absolute_error": [257.728263], - "r2_score": [0.764356], - "explained_variance": [0.764356], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -87,13 +74,11 @@ def test_pipeline_linear_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3968.8, 3176.3, 3545.2]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -115,21 +100,8 @@ def test_pipeline_linear_regression_series_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [528.495599], - "mean_squared_error": [421722.261808], - "mean_squared_log_error": [0.022963], - "median_absolute_error": [468.895249], - "r2_score": [0.346999], - "explained_variance": [0.346999], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -142,13 +114,11 @@ def test_pipeline_linear_regression_series_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins["culmen_length_mm"]).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3818.845703, 3732.022253, 3679.928123]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -176,21 +146,8 @@ def test_pipeline_logistic_regression_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "precision": [0.537091], - "recall": [0.538636], - "accuracy": [0.805389], - "f1_score": [0.537716], - "log_loss": [1.445433], - "roc_auc": [0.917818], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) # predict new labels @@ -211,19 +168,14 @@ def test_pipeline_logistic_regression_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, - dtype=pd.StringDtype(storage="pyarrow"), - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_sex"]], - expected, + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_sex"], + index=[1633, 1672, 1690], + col_exact=False, ) -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") @pytest.mark.flaky(retries=2) def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_index): """Test a supervised model with a minimal preprocessing step""" @@ -247,21 +199,8 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "mean_absolute_error": [202.298434], - "mean_squared_error": [74515.108971], - "mean_squared_log_error": [0.004365], - "median_absolute_error": [142.949219], - "r2_score": [0.88462], - "explained_variance": [0.886454], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_REGRESSION_METRICS, index=1 ) # predict new labels @@ -283,24 +222,14 @@ def test_pipeline_xgbregressor_fit_score_predict(session, penguins_df_default_in ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "predicted_body_mass_g": [ - 4287.34521484375, - 3198.351806640625, - 3385.34130859375, - ] - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) -# TODO(garrettwu): Re-enable or not check exact numbers. -@pytest.mark.skip(reason="bqml regression") @pytest.mark.flaky(retries=2) def test_pipeline_random_forest_classifier_fit_score_predict( session, penguins_df_default_index @@ -326,21 +255,8 @@ def test_pipeline_random_forest_classifier_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(X_train, y_train).to_pandas() - score_expected = pd.DataFrame( - { - "precision": [0.585505], - "recall": [0.58676], - "accuracy": [0.877246], - "f1_score": [0.585657], - "log_loss": [0.880643], - "roc_auc": [0.970697], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=utils.ML_CLASSFICATION_METRICS, index=1 ) # predict new labels @@ -361,14 +277,11 @@ def test_pipeline_random_forest_classifier_fit_score_predict( ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_sex": ["MALE", "FEMALE", "FEMALE"]}, - dtype=pd.StringDtype(storage="pyarrow"), - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_sex"]], - expected, + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_sex"], + index=[1633, 1672, 1690], + col_exact=False, ) @@ -412,40 +325,20 @@ def test_pipeline_PCA_fit_score_predict(session, penguins_df_default_index): # Check score to ensure the model was fitted score_result = pl.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - { - "total_explained_variance_ratio": [1.0], - }, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + score_result, columns=["total_explained_variance_ratio"], index=1 ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - { - "principal_component_1": [-1.115259, -1.506141, -1.471173], - "principal_component_2": [-0.074825, 0.69664, 0.406103], - "principal_component_3": [0.500013, -0.544479, 0.075849], - }, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - - assert_pandas_df_equal_pca( - predictions[ - [ - "principal_component_1", - "principal_component_2", - "principal_component_3", - ] + utils.check_pandas_df_schema_and_index( + predictions, + columns=[ + "principal_component_1", + "principal_component_2", + "principal_component_3", ], - expected, - check_exact=False, - rtol=0.1, + index=[1633, 1672, 1690], + col_exact=False, ) @@ -538,29 +431,16 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict( # Check score to ensure the model was fitted score_result = pl.score(new_penguins).to_pandas() - score_expected = pd.DataFrame( - {"davies_bouldin_index": [7.542981], "mean_squared_distance": [94.692409]}, - dtype="Float64", - ) - score_expected = score_expected.reindex(index=score_expected.index.astype("Int64")) - - pd.testing.assert_frame_equal( - score_result, score_expected, check_exact=False, rtol=0.1 - ) + eval_metrics = ["davies_bouldin_index", "mean_squared_distance"] + utils.check_pandas_df_schema_and_index(score_result, columns=eval_metrics, index=1) predictions = pl.predict(new_penguins).to_pandas().sort_index() - assert predictions.shape == (6, 9) - result = predictions[["CENTROID_ID"]] - expected = pd.DataFrame( - {"CENTROID_ID": [1, 2, 1, 2, 1, 2]}, - dtype="Int64", - index=pd.Index( - ["test1", "test2", "test3", "test4", "test5", "test6"], - dtype="string[pyarrow]", - ), + utils.check_pandas_df_schema_and_index( + predictions, + columns=["CENTROID_ID"], + index=["test1", "test2", "test3", "test4", "test5", "test6"], + col_exact=False, ) - expected.index.name = "observation" - assert_pandas_df_equal(result, expected, ignore_order=True) def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_index): @@ -632,13 +512,11 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind ).set_index("tag_number") ) predictions = pl.predict(new_penguins).to_pandas() - expected = pd.DataFrame( - {"predicted_body_mass_g": [3909.2, 3436.0, 2860.0]}, - dtype="Float64", - index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), - ) - pd.testing.assert_frame_equal( - predictions[["predicted_body_mass_g"]], expected, check_exact=False, rtol=0.1 + utils.check_pandas_df_schema_and_index( + predictions, + columns=["predicted_body_mass_g"], + index=[1633, 1672, 1690], + col_exact=False, ) diff --git a/tests/system/utils.py b/tests/system/utils.py index e40502e6f2..ab4c2c119f 100644 --- a/tests/system/utils.py +++ b/tests/system/utils.py @@ -15,7 +15,7 @@ import base64 import decimal import functools -from typing import Iterable, Optional, Set +from typing import Iterable, Optional, Set, Union import geopandas as gpd # type: ignore import google.api_core.operation @@ -28,6 +28,23 @@ from bigframes.functions import remote_function +ML_REGRESSION_METRICS = [ + "mean_absolute_error", + "mean_squared_error", + "mean_squared_log_error", + "median_absolute_error", + "r2_score", + "explained_variance", +] +ML_CLASSFICATION_METRICS = [ + "precision", + "recall", + "accuracy", + "f1_score", + "log_loss", + "roc_auc", +] + def skip_legacy_pandas(test): @functools.wraps(test) @@ -249,6 +266,33 @@ def assert_pandas_df_equal_pca(actual, expected, **kwargs): pd.testing.assert_series_equal(-actual[column], expected[column], **kwargs) +def check_pandas_df_schema_and_index( + pd_df: pd.DataFrame, + columns: Iterable, + index: Union[int, Iterable], + col_exact: bool = True, +): + """Check pandas df schema and index. But not the values. + + Args: + pd_df: the input pandas df + columns: target columns to check with + index: int or Iterable. If int, only check the length (index size) of the df. If Iterable, check index values match + col_exact: If True, check the columns param are exact match. Otherwise only check the df contains all of those columns + """ + if col_exact: + assert list(pd_df.columns) == list(columns) + else: + assert set(columns) <= set(pd_df.columns) + + if isinstance(index, int): + assert len(pd_df) == index + elif isinstance(index, Iterable): + assert list(pd_df.index) == list(index) + else: + raise ValueError("Unsupported index type.") + + def get_remote_function_endpoints( bigquery_client: bigquery.Client, dataset_id: str ) -> Set[str]: