From 0cc02fd9e4f19934fe0d43fd9cd0a089e94312af Mon Sep 17 00:00:00 2001 From: Prakhyath07 Date: Wed, 28 Dec 2022 16:48:26 +0530 Subject: [PATCH 1/4] FIX make sure to accept "minority" as a valid strategy in over-samplers (#964) Co-authored-by: Guillaume Lemaitre --- doc/whats_new/v0.10.rst | 15 +++++++++++++++ imblearn/over_sampling/base.py | 2 +- .../tests/test_random_over_sampler.py | 18 ++++++++++++++++++ .../tests/test_random_under_sampler.py | 18 ++++++++++++++++++ .../tests/test_tomek_links.py | 19 +++++++++++++++++++ 5 files changed, 71 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst index ca82c0c0d..708c3657f 100644 --- a/doc/whats_new/v0.10.rst +++ b/doc/whats_new/v0.10.rst @@ -1,5 +1,20 @@ .. _changes_0_10: +Version 0.10.1 +============== + +**December 28, 2022** + +Changelog +--------- + +Bug fixes +......... + +- Fix a regression in over-sampler where the string `minority` was rejected as + an unvalid sampling strategy. + :pr:`964` by :user:`Prakhyath Bhandary `. + Version 0.10.0 ============== diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py index d4e4a4541..fbd982bf2 100644 --- a/imblearn/over_sampling/base.py +++ b/imblearn/over_sampling/base.py @@ -61,7 +61,7 @@ class BaseOverSampler(BaseSampler): _parameter_constraints: dict = { "sampling_strategy": [ Interval(numbers.Real, 0, 1, closed="right"), - StrOptions({"auto", "majority", "not minority", "not majority", "all"}), + StrOptions({"auto", "minority", "not minority", "not majority", "all"}), Mapping, callable, ], diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py index 2db808f5b..b72132d19 100644 --- a/imblearn/over_sampling/tests/test_random_over_sampler.py +++ b/imblearn/over_sampling/tests/test_random_over_sampler.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import ( _convert_container, assert_allclose, @@ -255,3 +256,20 @@ def test_random_over_sampler_shrinkage_error(data, shrinkage, err_msg): ros = RandomOverSampler(shrinkage=shrinkage) with pytest.raises(ValueError, match=err_msg): ros.fit_resample(X, y) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "minority", "not minority", "not majority", "all"] +) +def test_random_over_sampler_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseOverSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + RandomOverSampler(sampling_strategy=sampling_strategy).fit_resample(X, y) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py index 2e845e83a..bcb8682e2 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import RandomUnderSampler @@ -130,3 +131,20 @@ def test_random_under_sampling_nan_inf(): assert y_res.shape == (6,) assert X_res.shape == (6, 2) assert np.any(~np.isfinite(X_res)) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"] +) +def test_random_under_sampler_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseUnderSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + RandomUnderSampler(sampling_strategy=sampling_strategy).fit_resample(X, y) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py index cd169393c..5fd837866 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py @@ -4,6 +4,8 @@ # License: MIT import numpy as np +import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import TomekLinks @@ -68,3 +70,20 @@ def test_tl_fit_resample(): y_gt = np.array([1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"] +) +def test_tomek_links_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseCleaningSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + TomekLinks(sampling_strategy=sampling_strategy).fit_resample(X, y) From 8632efed48ab4e672e6e4ff26a8db2132df9335d Mon Sep 17 00:00:00 2001 From: EliaSchiavon <85481745+EliaSchiavon@users.noreply.github.com> Date: Wed, 28 Dec 2022 12:20:40 +0100 Subject: [PATCH 2/4] DOC fix typos in an example (#963) minor fixes --- examples/api/plot_sampling_strategy_usage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py index a3beb87d6..b739a41c6 100644 --- a/examples/api/plot_sampling_strategy_usage.py +++ b/examples/api/plot_sampling_strategy_usage.py @@ -91,7 +91,7 @@ _ = ax.set_title("Over-sampling") # %% [markdown] -# `sampling_strategy` has a `str` +# `sampling_strategy` as a `str` # ------------------------------- # # `sampling_strategy` can be given as a string which specify the class @@ -129,7 +129,7 @@ _ = ax.set_title("Cleaning") # %% [markdown] -# `sampling_strategy as a `dict` +# `sampling_strategy` as a `dict` # ------------------------------ # # When `sampling_strategy` is a `dict`, the keys correspond to the targeted From 524ca03053c9eed0e6e18fd76eb00dc15290dc64 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 28 Dec 2022 14:20:34 +0100 Subject: [PATCH 3/4] MAINT pin numpy and scipy version for minimum keras/tensorflow (#965) --- azure-pipelines.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index bd239331a..6971cdf2d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -189,6 +189,8 @@ jobs: DISTRIB: 'conda-minimum-tensorflow' CONDA_CHANNEL: 'conda-forge' PYTHON_VERSION: '3.8' + NUMPY_VERSION: '1.19.5' # This version is the minimum requrired by tensorflow + SCIPY_VERSION: 'min' SKLEARN_VERSION: 'min' TENSORFLOW_VERSION: 'min' TEST_DOCS: 'true' @@ -213,6 +215,8 @@ jobs: DISTRIB: 'conda-minimum-keras' CONDA_CHANNEL: 'conda-forge' PYTHON_VERSION: '3.8' + NUMPY_VERSION: '1.19.5' # This version is the minimum requrired by tensorflow + SCIPY_VERSION: 'min' SKLEARN_VERSION: 'min' KERAS_VERSION: 'min' TEST_DOCS: 'true' From 64fc1ca13d429c08fb4a4c6dedc9b99b1c4e233c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 28 Dec 2022 14:25:48 +0100 Subject: [PATCH 4/4] REL bumpversion to 0.10.1 --- imblearn/_version.py | 2 +- setup.cfg | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/imblearn/_version.py b/imblearn/_version.py index e67ae5b4f..a4fdd70ca 100644 --- a/imblearn/_version.py +++ b/imblearn/_version.py @@ -22,4 +22,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.10.0" +__version__ = "0.10.1" diff --git a/setup.cfg b/setup.cfg index 58df5fbe5..7c4181bca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,14 @@ [bumpversion] -current_version = 0.10.0 +current_version = 0.10.1 tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}.{release}{dev} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = gamma -values = +values = dev gamma @@ -22,17 +22,17 @@ test = pytest [tool:pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS testpaths = imblearn -addopts = +addopts = --doctest-modules --color=yes -rs -filterwarnings = +filterwarnings = ignore:the matrix subclass:PendingDeprecationWarning [flake8] max-line-length = 88 target-version = ['py37'] -ignore = +ignore = E24, E121, E123, @@ -44,17 +44,18 @@ ignore = E741, W503, W504 -exclude = +exclude = .git, __pycache__, dist, doc/_build, doc/auto_examples, build, -per-file-ignores = +per-file-ignores = examples/*: E402 doc/conf.py: E402 [mypy] ignore_missing_imports = True allow_redefinition = True +