From ef4edde2539445884297c0b7c9a0bf1b8f2100b4 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 11 Jan 2022 19:06:02 +0100 Subject: [PATCH 1/5] bumpversion 0.9.0 --- imblearn/_version.py | 2 +- setup.cfg | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/imblearn/_version.py b/imblearn/_version.py index c97eb6b29..b25b802af 100644 --- a/imblearn/_version.py +++ b/imblearn/_version.py @@ -22,4 +22,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.9.0.dev0" +__version__ = "0.9.0" diff --git a/setup.cfg b/setup.cfg index b9eae29e4..3f38ce854 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,14 @@ [bumpversion] -current_version = 0.9.0.dev0 +current_version = 0.9.0 tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}.{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = gamma -values = +values = dev gamma @@ -22,19 +22,20 @@ test = pytest [tool:pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS testpaths = imblearn -addopts = +addopts = --doctest-modules --color=yes -rs -filterwarnings = +filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning [flake8] max-line-length = 88 ignore = E121,E123,E126,E226,E24,E704,W503,W504,E203 -per-file-ignores = +per-file-ignores = examples/*: E402 [mypy] ignore_missing_imports = True allow_redefinition = True + From 5693cbafb7fe7b37e3c6f1eebbcee9d646da441e Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 16 May 2022 16:25:42 +0200 Subject: [PATCH 2/5] MNT adapt for scikit-learn 1.1 (#902) --- .pre-commit-config.yaml | 2 +- README.rst | 12 +- azure-pipelines.yml | 65 +++++---- imblearn/_min_dependencies.py | 23 +-- imblearn/datasets/tests/test_imbalance.py | 13 +- imblearn/ensemble/_forest.py | 138 +++++++++++------- .../ensemble/tests/test_weight_boosting.py | 14 +- imblearn/metrics/tests/test_classification.py | 2 +- imblearn/over_sampling/_smote/cluster.py | 2 +- imblearn/pipeline.py | 4 +- imblearn/tests/test_pipeline.py | 13 +- imblearn/utils/estimator_checks.py | 16 +- pyproject.toml | 4 +- 13 files changed, 176 insertions(+), 132 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db2c5084c..6519a8498 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 21.6b0 + rev: 22.3.0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 diff --git a/README.rst b/README.rst index 81f580e69..2cb78ec4a 100644 --- a/README.rst +++ b/README.rst @@ -27,12 +27,12 @@ .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg .. _Black: :target: https://github.com/psf/black -.. |PythonMinVersion| replace:: 3.7 -.. |NumPyMinVersion| replace:: 1.14.6 -.. |SciPyMinVersion| replace:: 1.1.0 -.. |ScikitLearnMinVersion| replace:: 1.0.1 -.. |MatplotlibMinVersion| replace:: 2.2.3 -.. |PandasMinVersion| replace:: 0.25.0 +.. |PythonMinVersion| replace:: 3.8 +.. |NumPyMinVersion| replace:: 1.17.3 +.. |SciPyMinVersion| replace:: 1.3.2 +.. |ScikitLearnMinVersion| replace:: 1.1.0 +.. |MatplotlibMinVersion| replace:: 3.1.2 +.. |PandasMinVersion| replace:: 1.0.5 .. |TensorflowMinVersion| replace:: 2.4.3 .. |KerasMinVersion| replace:: 2.4.3 .. |SeabornMinVersion| replace:: 0.9.0 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d614ff1b3..d9a5b1f9b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -45,7 +45,7 @@ jobs: versionSpec: '3.9' - bash: | # Include pytest compatibility with mypy - pip install pytest flake8 mypy==0.782 black==21.6b0 + pip install pytest flake8 mypy==0.782 black==22.3 displayName: Install linters - bash: | black --check --diff . @@ -115,7 +115,7 @@ jobs: py37_conda_forge_openblas_ubuntu_1804: DISTRIB: 'conda' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.7' + PYTHON_VERSION: '3.8' BLAS: 'openblas' COVERAGE: 'false' @@ -140,13 +140,13 @@ jobs: PANDAS_VERSION: 'none' THREADPOOLCTL_VERSION: 'min' COVERAGE: 'false' - # Linux + Python 3.7 build with OpenBLAS and without SITE_JOBLIB + # Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB py37_conda_defaults_openblas: DISTRIB: 'conda' - CONDA_CHANNEL: 'defaults' # Anaconda main channel - PYTHON_VERSION: '3.7' + CONDA_CHANNEL: 'conda-forge' + PYTHON_VERSION: '3.8' BLAS: 'openblas' - NUMPY_VERSION: '1.16.6' # we cannot get an older version of the dependencies resolution + NUMPY_VERSION: '1.19.5' # we cannot get an older version of the dependencies resolution SCIPY_VERSION: 'min' SKLEARN_VERSION: 'min' MATPLOTLIB_VERSION: 'none' @@ -177,7 +177,7 @@ jobs: conda_tensorflow_minimum: DISTRIB: 'conda-minimum-tensorflow' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.7' + PYTHON_VERSION: '3.8' SKLEARN_VERSION: 'min' TENSORFLOW_VERSION: 'min' TEST_DOCS: 'true' @@ -201,33 +201,34 @@ jobs: conda_keras_minimum: DISTRIB: 'conda-minimum-keras' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.7' + PYTHON_VERSION: '3.8' SKLEARN_VERSION: 'min' KERAS_VERSION: 'min' TEST_DOCS: 'true' TEST_DOCSTRINGS: 'false' # it is going to fail because of scikit-learn inheritance CHECK_WARNINGS: 'true' -- template: build_tools/azure/posix-docker.yml - parameters: - name: Linux_Docker - vmImage: ubuntu-20.04 - dependsOn: [linting, git_commit] - condition: | - and( - succeeded(), - not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), - ne(variables['Build.Reason'], 'Schedule') - ) - matrix: - debian_atlas_32bit: - DISTRIB: 'debian-32' - DOCKER_CONTAINER: 'i386/debian:10.9' - JOBLIB_VERSION: 'min' - # disable pytest xdist due to unknown bug with 32-bit container - PYTEST_XDIST_VERSION: 'none' - PYTEST_VERSION: 'min' - THREADPOOLCTL_VERSION: '2.2.0' +# Currently runs on Python 3.8 while only Python 3.7 available +# - template: build_tools/azure/posix-docker.yml +# parameters: +# name: Linux_Docker +# vmImage: ubuntu-20.04 +# dependsOn: [linting, git_commit] +# condition: | +# and( +# succeeded(), +# not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), +# ne(variables['Build.Reason'], 'Schedule') +# ) +# matrix: +# debian_atlas_32bit: +# DISTRIB: 'debian-32' +# DOCKER_CONTAINER: 'i386/debian:10.9' +# JOBLIB_VERSION: 'min' +# # disable pytest xdist due to unknown bug with 32-bit container +# PYTEST_XDIST_VERSION: 'none' +# PYTEST_VERSION: 'min' +# THREADPOOLCTL_VERSION: '2.2.0' - template: build_tools/azure/posix.yml parameters: @@ -266,14 +267,14 @@ jobs: ne(variables['Build.Reason'], 'Schedule') ) matrix: - py37_conda_forge_mkl: + 8_conda_forge_mkl: DISTRIB: 'conda' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.7' + PYTHON_VERSION: '3.8' CHECK_WARNINGS: 'true' PYTHON_ARCH: '64' PYTEST_VERSION: '*' COVERAGE: 'true' - py37_pip_openblas_32bit: - PYTHON_VERSION: '3.7' + py38_pip_openblas_32bit: + PYTHON_VERSION: '3.8' PYTHON_ARCH: '32' diff --git a/imblearn/_min_dependencies.py b/imblearn/_min_dependencies.py index 736b392c5..a9e0d41eb 100644 --- a/imblearn/_min_dependencies.py +++ b/imblearn/_min_dependencies.py @@ -1,25 +1,16 @@ """All minimum dependencies for imbalanced-learn.""" import argparse -# numpy scipy and cython should by in sync with pyproject.toml -# We pinned PyWavelet (a scikit-image dependence) to 1.1.1 in the minimum -# documentation CI builds that is the latest version that support our -# minimum NumPy version required. If PyWavelets 1.2+ is installed, it would -# require NumPy 1.17+ that trigger a bug with Pandas 0.25: -# https://github.com/numpy/numpy/issues/18355#issuecomment-774610226 -# When upgrading NumPy, we can unpin PyWavelets but we need to update the -# minimum version of Pandas >= 1.0.5. -NUMPY_MIN_VERSION = "1.14.6" -SCIPY_MIN_VERSION = "1.1.0" -PANDAS_MIN_VERSION = "0.25.0" -SKLEARN_MIN_VERSION = "1.0.1" +NUMPY_MIN_VERSION = "1.17.3" +SCIPY_MIN_VERSION = "1.3.2" +PANDAS_MIN_VERSION = "1.0.5" +SKLEARN_MIN_VERSION = "1.1.0" TENSORFLOW_MIN_VERSION = "2.4.3" KERAS_MIN_VERSION = "2.4.3" -JOBLIB_MIN_VERSION = "0.11" +JOBLIB_MIN_VERSION = "1.0.0" THREADPOOLCTL_MIN_VERSION = "2.0.0" PYTEST_MIN_VERSION = "5.0.1" - # 'build' and 'install' is included to have structured metadata for CI. # It will NOT be included in setup's extras_require # The values are (version_spec, comma separated tags) @@ -32,13 +23,13 @@ "pandas": (PANDAS_MIN_VERSION, "optional, docs, examples, tests"), "tensorflow": (TENSORFLOW_MIN_VERSION, "optional, docs, examples, tests"), "keras": (KERAS_MIN_VERSION, "optional, docs, examples, tests"), - "matplotlib": ("2.2.3", "docs, examples"), + "matplotlib": ("3.1.2", "docs, examples"), "seaborn": ("0.9.0", "docs, examples"), "memory_profiler": ("0.57.0", "docs"), "pytest": (PYTEST_MIN_VERSION, "tests"), "pytest-cov": ("2.9.0", "tests"), "flake8": ("3.8.2", "tests"), - "black": ("21.6b0", "tests"), + "black": ("22.3.0", "tests"), "mypy": ("0.770", "tests"), "sphinx": ("4.2.0", "docs"), "sphinx-gallery": ("0.7.0", "docs"), diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py index a4a36d8f0..4cf2eedd2 100644 --- a/imblearn/datasets/tests/test_imbalance.py +++ b/imblearn/datasets/tests/test_imbalance.py @@ -9,7 +9,6 @@ import numpy as np from sklearn.datasets import load_iris -from sklearn.datasets import fetch_openml from imblearn.datasets import make_imbalance @@ -60,18 +59,20 @@ def test_make_imbalance_dict(iris, sampling_strategy, expected_counts): "sampling_strategy, expected_counts", [ ( - {"Iris-setosa": 10, "Iris-versicolor": 20, "Iris-virginica": 30}, - {"Iris-setosa": 10, "Iris-versicolor": 20, "Iris-virginica": 30}, + {"setosa": 10, "versicolor": 20, "virginica": 30}, + {"setosa": 10, "versicolor": 20, "virginica": 30}, ), ( - {"Iris-setosa": 10, "Iris-versicolor": 20}, - {"Iris-setosa": 10, "Iris-versicolor": 20, "Iris-virginica": 50}, + {"setosa": 10, "versicolor": 20}, + {"setosa": 10, "versicolor": 20, "virginica": 50}, ), ], ) def test_make_imbalanced_iris(as_frame, sampling_strategy, expected_counts): pytest.importorskip("pandas") - X, y = fetch_openml("iris", version=1, return_X_y=True, as_frame=as_frame) + iris = load_iris(as_frame=True) + X, y = iris.data, iris.target + y = iris.target_names[iris.target] X_res, y_res = make_imbalance(X, y, sampling_strategy=sampling_strategy) if as_frame: assert hasattr(X_res, "loc") diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py index 876f0558d..77c2de908 100644 --- a/imblearn/ensemble/_forest.py +++ b/imblearn/ensemble/_forest.py @@ -14,7 +14,7 @@ from joblib import Parallel, delayed -from sklearn.base import clone +from sklearn.base import clone, is_classifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble._base import _set_random_states from sklearn.ensemble._forest import _get_n_samples_bootstrap @@ -22,10 +22,9 @@ from sklearn.ensemble._forest import _generate_unsampled_indices from sklearn.exceptions import DataConversionWarning from sklearn.tree import DecisionTreeClassifier -from sklearn.utils import check_array from sklearn.utils import check_random_state from sklearn.utils import _safe_indexing -from sklearn.utils.fixes import _joblib_parallel_args +from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import _check_sample_weight from ..pipeline import make_pipeline @@ -43,7 +42,7 @@ def _local_parallel_build_trees( sampler, tree, - forest, + bootstrap, X, y, sample_weight, @@ -61,7 +60,7 @@ def _local_parallel_build_trees( n_samples_bootstrap = min(n_samples_bootstrap, X_resampled.shape[0]) tree = _parallel_build_trees( tree, - forest, + bootstrap, X_resampled, y_resampled, sample_weight, @@ -126,7 +125,7 @@ class BalancedRandomForestClassifier(RandomForestClassifier): equal weight when sample_weight is not provided. max_features : {{"auto", "sqrt", "log2"}}, int, float, or None, \ - default="auto" + default="sqrt" The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. @@ -334,7 +333,7 @@ def __init__( min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, - max_features="auto", + max_features="sqrt", max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, @@ -548,12 +547,12 @@ def fit(self, X, y, sample_weight=None): samplers_trees = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, - **_joblib_parallel_args(prefer="threads"), + prefer="threads", )( delayed(_local_parallel_build_trees)( s, t, - self, + self.bootstrap, X, y_encoded, sample_weight, @@ -580,7 +579,19 @@ def fit(self, X, y, sample_weight=None): ) if self.oob_score: - self._set_oob_score(X, y_encoded) + y_type = type_of_target(y) + if y_type in ("multiclass-multioutput", "unknown"): + # FIXME: we could consider to support multiclass-multioutput if + # we introduce or reuse a constructor parameter (e.g. + # oob_score) allowing our user to pass a callable defining the + # scoring strategy on OOB sample. + raise ValueError( + "The type of target cannot be used to compute OOB " + f"estimates. Got {y_type} while only the following are " + "supported: continuous, continuous-multioutput, binary, " + "multiclass, multilabel-indicator." + ) + self._set_oob_score_and_attributes(X, y_encoded) # Decapsulate classes_ attributes if hasattr(self, "classes_") and self.n_outputs_ == 1: @@ -589,18 +600,62 @@ def fit(self, X, y, sample_weight=None): return self - def _set_oob_score(self, X, y): - """Compute out-of-bag score.""" - X = check_array(X, dtype=DTYPE, accept_sparse="csr") + def _set_oob_score_and_attributes(self, X, y): + """Compute and set the OOB score and attributes. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The data matrix. + y : ndarray of shape (n_samples, n_outputs) + The target matrix. + """ + self.oob_decision_function_ = self._compute_oob_predictions(X, y) + if self.oob_decision_function_.shape[-1] == 1: + # drop the n_outputs axis if there is a single output + self.oob_decision_function_ = self.oob_decision_function_.squeeze(axis=-1) + from sklearn.metrics import accuracy_score + + self.oob_score_ = accuracy_score( + y, np.argmax(self.oob_decision_function_, axis=1) + ) + + def _compute_oob_predictions(self, X, y): + """Compute and set the OOB score. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The data matrix. + y : ndarray of shape (n_samples, n_outputs) + The target matrix. + + Returns + ------- + oob_pred : ndarray of shape (n_samples, n_classes, n_outputs) or \ + (n_samples, 1, n_outputs) + The OOB predictions. + """ + # Prediction requires X to be in CSR format + if issparse(X): + X = X.tocsr() - n_classes_ = self.n_classes_ n_samples = y.shape[0] + n_outputs = self.n_outputs_ - oob_decision_function = [] - oob_score = 0.0 - predictions = [ - np.zeros((n_samples, n_classes_[k])) for k in range(self.n_outputs_) - ] + if is_classifier(self) and hasattr(self, "n_classes_"): + # n_classes_ is a ndarray at this stage + # all the supported type of target will have the same number of + # classes in all outputs + oob_pred_shape = (n_samples, self.n_classes_[0], n_outputs) + else: + # for regression, n_classes_ does not exist and we create an empty + # axis to be consistent with the classification case and make + # the array operations compatible with the 2 settings + oob_pred_shape = (n_samples, 1, n_outputs) + + oob_pred = np.zeros(shape=oob_pred_shape, dtype=np.float64) + n_oob_pred = np.zeros((n_samples, n_outputs), dtype=np.int64) for sampler, estimator in zip(self.samplers_, self.estimators_): X_resample = X[sampler.sample_indices_] @@ -614,42 +669,27 @@ def _set_oob_score(self, X, y): unsampled_indices = _generate_unsampled_indices( estimator.random_state, n_sample_subset, n_samples_bootstrap ) - p_estimator = estimator.predict_proba( - X_resample[unsampled_indices, :], check_input=False - ) - if self.n_outputs_ == 1: - p_estimator = [p_estimator] + y_pred = self._get_oob_predictions( + estimator, X_resample[unsampled_indices, :] + ) - for k in range(self.n_outputs_): - indices = sampler.sample_indices_[unsampled_indices] - predictions[k][indices, :] += p_estimator[k] + indices = sampler.sample_indices_[unsampled_indices] + oob_pred[indices, ...] += y_pred + n_oob_pred[indices, :] += 1 - for k in range(self.n_outputs_): - if (predictions[k].sum(axis=1) == 0).any(): + for k in range(n_outputs): + if (n_oob_pred == 0).any(): warn( - "Some inputs do not have OOB scores. " - "This probably means too few trees were used " - "to compute any reliable oob estimates." + "Some inputs do not have OOB scores. This probably means " + "too few trees were used to compute any reliable OOB " + "estimates.", + UserWarning, ) + n_oob_pred[n_oob_pred == 0] = 1 + oob_pred[..., k] /= n_oob_pred[..., [k]] - with np.errstate(invalid="ignore", divide="ignore"): - # with the resampling, we are likely to have rows not included - # for the OOB score leading to division by zero - decision = predictions[k] / predictions[k].sum(axis=1)[:, np.newaxis] - mask_scores = np.isnan(np.sum(decision, axis=1)) - oob_decision_function.append(decision) - oob_score += np.mean( - y[~mask_scores, k] == np.argmax(predictions[k][~mask_scores], axis=1), - axis=0, - ) - - if self.n_outputs_ == 1: - self.oob_decision_function_ = oob_decision_function[0] - else: - self.oob_decision_function_ = oob_decision_function - - self.oob_score_ = oob_score / self.n_outputs_ + return oob_pred @property def n_features_(self): diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index 77abdc652..e1394a2b5 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -26,15 +26,19 @@ def imbalanced_dataset(): @pytest.mark.parametrize( - "boosting_params, err_msg", + "boosting_params, err_type, err_msg", [ - ({"n_estimators": "whatever"}, "n_estimators must be an integer"), - ({"n_estimators": -100}, "n_estimators must be greater than zero"), + ( + {"n_estimators": "whatever"}, + TypeError, + "n_estimators must be an instance of int, not str.", + ), + ({"n_estimators": -100}, ValueError, "n_estimators == -100, must be >= 1."), ], ) -def test_rusboost_error(imbalanced_dataset, boosting_params, err_msg): +def test_rusboost_error(imbalanced_dataset, boosting_params, err_type, err_msg): rusboost = RUSBoostClassifier(**boosting_params) - with pytest.raises(ValueError, match=err_msg): + with pytest.raises(err_type, match=err_msg): rusboost.fit(*imbalanced_dataset) diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py index 47a37a5b8..29d8775f9 100644 --- a/imblearn/metrics/tests/test_classification.py +++ b/imblearn/metrics/tests/test_classification.py @@ -210,7 +210,7 @@ def test_geometric_mean_support_binary(): [0, 1, 2, 0, 1, 2], [0, 2, 1, 0, 0, 1], 0.001, - (0.001 ** 2) ** (1 / 3), + (0.001**2) ** (1 / 3), ), ([0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5], 0.001, 1), ([0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], 0.001, (0.5 * 0.75) ** 0.5), diff --git a/imblearn/over_sampling/_smote/cluster.py b/imblearn/over_sampling/_smote/cluster.py index c18e9b7db..a938d31db 100644 --- a/imblearn/over_sampling/_smote/cluster.py +++ b/imblearn/over_sampling/_smote/cluster.py @@ -198,7 +198,7 @@ def _find_cluster_sparsity(self, X): if self.density_exponent == "auto" else self.density_exponent ) - return (mean_distance ** exponent) / X.shape[0] + return (mean_distance**exponent) / X.shape[0] def _fit_resample(self, X, y): self._validate_estimator() diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index 5e8e2ad13..b112a0a20 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -15,7 +15,7 @@ from sklearn import pipeline from sklearn.base import clone from sklearn.utils import _print_elapsed_time -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import check_memory __all__ = ["Pipeline", "make_pipeline"] @@ -352,7 +352,7 @@ def fit_resample(self, X, y=None, **fit_params): if hasattr(last_step, "fit_resample"): return last_step.fit_resample(Xt, yt, **fit_params_last_step) - @if_delegate_has_method(delegate="_final_estimator") + @available_if(pipeline._final_estimator_has("fit_predict")) def fit_predict(self, X, y=None, **fit_params): """Apply `fit_predict` of last step in pipeline after transforms. diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py index e4fedcccc..6a3a80462 100644 --- a/imblearn/tests/test_pipeline.py +++ b/imblearn/tests/test_pipeline.py @@ -200,11 +200,13 @@ def test_pipeline_init(): Pipeline() # Check that we can't instantiate pipelines with objects without fit # method + X, y = load_iris(return_X_y=True) error_regex = ( "Last step of Pipeline should implement fit or be the string 'passthrough'" ) with raises(TypeError, match=error_regex): - Pipeline([("clf", NoFit())]) + model = Pipeline([("clf", NoFit())]) + model.fit(X, y) # Smoke test with only an estimator clf = NoTrans() pipe = Pipeline([("svc", clf)]) @@ -227,7 +229,8 @@ def test_pipeline_init(): # Note that NoTrans implements fit, but not transform error_regex = "implement fit and transform or fit_resample" with raises(TypeError, match=error_regex): - Pipeline([("t", NoTrans()), ("svc", clf)]) + model = Pipeline([("t", NoTrans()), ("svc", clf)]) + model.fit(X, y) # Check that params are set pipe.set_params(svc__C=0.1) @@ -1074,7 +1077,8 @@ def test_pipeline_with_step_that_implements_both_sample_and_transform(): clf = LogisticRegression(solver="lbfgs") with raises(TypeError): - Pipeline([("step", FitTransformSample()), ("logistic", clf)]) + pipeline = Pipeline([("step", FitTransformSample()), ("logistic", clf)]) + pipeline.fit(X, y) def test_pipeline_with_step_that_it_is_pipeline(): @@ -1097,7 +1101,8 @@ def test_pipeline_with_step_that_it_is_pipeline(): filter1 = SelectKBest(f_classif, k=2) pipe1 = Pipeline([("rus", rus), ("anova", filter1)]) with raises(TypeError): - Pipeline([("pipe1", pipe1), ("logistic", clf)]) + pipe2 = Pipeline([("pipe1", pipe1), ("logistic", clf)]) + pipe2.fit(X, y) def test_pipeline_fit_then_sample_with_sampler_last_estimator(): diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py index 34751d044..086de2e74 100644 --- a/imblearn/utils/estimator_checks.py +++ b/imblearn/utils/estimator_checks.py @@ -18,7 +18,7 @@ from sklearn.base import clone from sklearn.datasets import ( - fetch_openml, + load_iris, make_classification, make_multilabel_classification, ) # noqa @@ -446,19 +446,21 @@ def check_classifier_on_multilabel_or_multioutput_targets(name, estimator_orig): def check_classifiers_with_encoded_labels(name, classifier_orig): # Non-regression test for #709 # https://github.com/scikit-learn-contrib/imbalanced-learn/issues/709 - pytest.importorskip("pandas") + pd = pytest.importorskip("pandas") classifier = clone(classifier_orig) - df, y = fetch_openml("iris", version=1, as_frame=True, return_X_y=True) + iris = load_iris(as_frame=True) + df, y = iris.data, iris.target + y = pd.Series(iris.target_names[iris.target], dtype="category") df, y = make_imbalance( df, y, sampling_strategy={ - "Iris-setosa": 30, - "Iris-versicolor": 20, - "Iris-virginica": 50, + "setosa": 30, + "versicolor": 20, + "virginica": 50, }, ) - classifier.set_params(sampling_strategy={"Iris-setosa": 20, "Iris-virginica": 20}) + classifier.set_params(sampling_strategy={"setosa": 20, "virginica": 20}) classifier.fit(df, y) assert set(classifier.classes_) == set(y.cat.categories.tolist()) y_pred = classifier.predict(df) diff --git a/pyproject.toml b/pyproject.toml index c4d08ad7e..641894a8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [tool.black] -target-version = ['py36'] -include = '\.pyi?$' \ No newline at end of file +target-version = ['py38'] +include = '\.pyi?$' From de4bf669c5a2e0edc5a52da3105d9fb6d64ddc46 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 16 May 2022 20:40:16 +0200 Subject: [PATCH 3/5] DOC add whats new 0.9.1 --- doc/whats_new/v0.9.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst index a7e65552e..d5c7d08a1 100644 --- a/doc/whats_new/v0.9.rst +++ b/doc/whats_new/v0.9.rst @@ -1,10 +1,19 @@ .. _changes_0_9: +Version 0.9.1 +============= + +Changelog +--------- + +This release provides fixes that make `imbalanced-learn` works with the +latest release (`1.1.0`) of `scikit-learn`. + Version 0.9.0 ============= Changelog --------- -This release is mainly providing fixes that make `imbalaned-learn` works -with the latest release (`1.0.2`) of scikit-learn. +This release is mainly providing fixes that make `imbalanced-learn` works +with the latest release (`1.0.2`) of `scikit-learn`. From 18d512363c87cbab97f9cddbb0b8cf50033f92f0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 16 May 2022 20:41:55 +0200 Subject: [PATCH 4/5] REL make 0.9.1 release --- imblearn/_version.py | 2 +- setup.cfg | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/imblearn/_version.py b/imblearn/_version.py index b25b802af..c39c64826 100644 --- a/imblearn/_version.py +++ b/imblearn/_version.py @@ -22,4 +22,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.9.0" +__version__ = "0.9.1" diff --git a/setup.cfg b/setup.cfg index 3f38ce854..eb8573b0b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,14 @@ [bumpversion] -current_version = 0.9.0 +current_version = 0.9.1 tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}.{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = gamma -values = +values = dev gamma @@ -22,20 +22,19 @@ test = pytest [tool:pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS testpaths = imblearn -addopts = +addopts = --doctest-modules --color=yes -rs -filterwarnings = +filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning [flake8] max-line-length = 88 ignore = E121,E123,E126,E226,E24,E704,W503,W504,E203 -per-file-ignores = +per-file-ignores = examples/*: E402 [mypy] ignore_missing_imports = True allow_redefinition = True - From 9f8830e13207ddf85596831593da0183b36cd1fb Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 May 2022 08:52:13 +0200 Subject: [PATCH 5/5] [doc build]