Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[feat] enable the n_jobs for mutual info regression and classifier #21409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions 9 doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ Changelog
error when 'min_idf' or 'max_idf' are floating-point numbers greater than 1.
:pr:`20752` by :user:`Alek Lefebvre <AlekLefebvre>`.

:mod:`sklearn.feature_selection`
.................................

- |Enhancement| Enabled parallel processing for
:func:`feature_selection.mutual_info_regression` and
:func:`feature_selection.mutual_info_classif` by offering
n_jobs parameters.
:pr:`21409` by :user:`Bingo Li <Bingoko>`.

:mod:`sklearn.impute`
.....................

Expand Down
89 changes: 76 additions & 13 deletions 89 sklearn/feature_selection/_mutual_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..utils.multiclass import check_classification_targets


def _compute_mi_cc(x, y, n_neighbors):
def _compute_mi_cc(x, y, n_neighbors, n_jobs):
"""Compute mutual information between two continuous variables.

Parameters
Expand All @@ -26,6 +26,12 @@ def _compute_mi_cc(x, y, n_neighbors):
n_neighbors : int
Number of nearest neighbors to search for each point, see [1]_.

n_jobs : int, default=None
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.

Returns
-------
mi : float
Expand All @@ -51,7 +57,7 @@ def _compute_mi_cc(x, y, n_neighbors):
xy = np.hstack((x, y))

# Here we rely on NearestNeighbors to select the fastest algorithm.
nn = NearestNeighbors(metric="chebyshev", n_neighbors=n_neighbors)
nn = NearestNeighbors(metric="chebyshev", n_neighbors=n_neighbors, n_jobs=n_jobs)

nn.fit(xy)
radius = nn.kneighbors()[0]
Expand All @@ -77,7 +83,7 @@ def _compute_mi_cc(x, y, n_neighbors):
return max(0, mi)


def _compute_mi_cd(c, d, n_neighbors):
def _compute_mi_cd(c, d, n_neighbors, n_jobs):
"""Compute mutual information between continuous and discrete variables.

Parameters
Expand All @@ -91,6 +97,12 @@ def _compute_mi_cd(c, d, n_neighbors):
n_neighbors : int
Number of nearest neighbors to search for each point, see [1]_.

n_jobs : int, default=None
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.

Returns
-------
mi : float
Expand All @@ -115,7 +127,7 @@ def _compute_mi_cd(c, d, n_neighbors):
radius = np.empty(n_samples)
label_counts = np.empty(n_samples)
k_all = np.empty(n_samples)
nn = NearestNeighbors()
nn = NearestNeighbors(n_jobs=n_jobs)
for label in np.unique(d):
mask = d == label
count = np.sum(mask)
Expand Down Expand Up @@ -150,7 +162,7 @@ def _compute_mi_cd(c, d, n_neighbors):
return max(0, mi)


def _compute_mi(x, y, x_discrete, y_discrete, n_neighbors=3):
def _compute_mi(x, y, x_discrete, y_discrete, n_neighbors=3, n_jobs=None):
"""Compute mutual information between two variables.

This is a simple wrapper which selects a proper function to call based on
Expand All @@ -159,11 +171,11 @@ def _compute_mi(x, y, x_discrete, y_discrete, n_neighbors=3):
if x_discrete and y_discrete:
return mutual_info_score(x, y)
elif x_discrete and not y_discrete:
return _compute_mi_cd(y, x, n_neighbors)
return _compute_mi_cd(y, x, n_neighbors, n_jobs)
elif not x_discrete and y_discrete:
return _compute_mi_cd(x, y, n_neighbors)
return _compute_mi_cd(x, y, n_neighbors, n_jobs)
else:
return _compute_mi_cc(x, y, n_neighbors)
return _compute_mi_cc(x, y, n_neighbors, n_jobs)


def _iterate_columns(X, columns=None):
Expand Down Expand Up @@ -202,6 +214,7 @@ def _estimate_mi(
discrete_features="auto",
discrete_target=False,
n_neighbors=3,
n_jobs=None,
copy=True,
random_state=None,
):
Expand Down Expand Up @@ -230,6 +243,12 @@ def _estimate_mi(
see [1]_ and [2]_. Higher values reduce variance of the estimation, but
could introduce a bias.

n_jobs : int, default=None
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.

copy : bool, default=True
Whether to make a copy of the given data. If set to False, the initial
data will be overwritten.
Expand Down Expand Up @@ -298,15 +317,22 @@ def _estimate_mi(
y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples)

mi = [
_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors)
_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors, n_jobs)
for x, discrete_feature in zip(_iterate_columns(X), discrete_mask)
]

return np.array(mi)


def mutual_info_regression(
X, y, *, discrete_features="auto", n_neighbors=3, copy=True, random_state=None
X,
y,
*,
discrete_features="auto",
n_neighbors=3,
n_jobs=None,
copy=True,
random_state=None,
):
"""Estimate mutual information for a continuous target variable.

Expand Down Expand Up @@ -342,6 +368,12 @@ def mutual_info_regression(
see [2]_ and [3]_. Higher values reduce variance of the estimation, but
could introduce a bias.

n_jobs : int, default=None
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.

copy : bool, default=True
Whether to make a copy of the given data. If set to False, the initial
data will be overwritten.
Expand Down Expand Up @@ -381,11 +413,27 @@ def mutual_info_regression(
.. [4] L. F. Kozachenko, N. N. Leonenko, "Sample Estimate of the Entropy
of a Random Vector", Probl. Peredachi Inf., 23:2 (1987), 9-16
"""
return _estimate_mi(X, y, discrete_features, False, n_neighbors, copy, random_state)
return _estimate_mi(
X=X,
y=y,
discrete_features=discrete_features,
discrete_target=False,
n_neighbors=n_neighbors,
n_jobs=n_jobs,
copy=copy,
random_state=random_state,
)


def mutual_info_classif(
X, y, *, discrete_features="auto", n_neighbors=3, copy=True, random_state=None
X,
y,
*,
discrete_features="auto",
n_neighbors=3,
n_jobs=None,
copy=True,
random_state=None,
):
"""Estimate mutual information for a discrete target variable.

Expand Down Expand Up @@ -421,6 +469,12 @@ def mutual_info_classif(
see [2]_ and [3]_. Higher values reduce variance of the estimation, but
could introduce a bias.

n_jobs : int, default=None
The number of parallel jobs to run for neighbors search.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.

copy : bool, default=True
Whether to make a copy of the given data. If set to False, the initial
data will be overwritten.
Expand Down Expand Up @@ -461,4 +515,13 @@ def mutual_info_classif(
of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16
"""
check_classification_targets(y)
return _estimate_mi(X, y, discrete_features, True, n_neighbors, copy, random_state)
return _estimate_mi(
X=X,
y=y,
discrete_features=discrete_features,
discrete_target=True,
n_neighbors=n_neighbors,
n_jobs=n_jobs,
copy=copy,
random_state=random_state,
)
36 changes: 36 additions & 0 deletions 36 sklearn/feature_selection/tests/test_mutual_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_compute_mi_dd():
I_xy = H_x + H_y - H_xy

assert_almost_equal(_compute_mi(x, y, True, True), I_xy)
assert_almost_equal(_compute_mi(x, y, True, True, n_jobs=2), I_xy)


def test_compute_mi_cc():
Expand Down Expand Up @@ -53,6 +54,10 @@ def test_compute_mi_cc():
I_computed = _compute_mi(x, y, False, False, n_neighbors)
assert_almost_equal(I_computed, I_theory, 1)

for n_neighbors in [3, 5, 7]:
I_computed_ = _compute_mi(x, y, False, False, n_neighbors, n_jobs=2)
assert_almost_equal(I_computed_, I_theory, 1)


def test_compute_mi_cd():
# To test define a joint distribution as follows:
Expand Down Expand Up @@ -90,6 +95,10 @@ def test_compute_mi_cd():
I_computed = _compute_mi(x, y, True, False, n_neighbors)
assert_almost_equal(I_computed, I_theory, 1)

for n_neighbors in [3, 5, 7]:
I_computed_ = _compute_mi(x, y, True, False, n_neighbors, n_jobs=2)
assert_almost_equal(I_computed_, I_theory, 1)


def test_compute_mi_cd_unique_label():
# Test that adding unique label doesn't change MI.
Expand All @@ -102,12 +111,15 @@ def test_compute_mi_cd_unique_label():
y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))

mi_1 = _compute_mi(x, y, True, False)
mi_1_ = _compute_mi(x, y, True, False, n_jobs=2)

x = np.hstack((x, 2))
y = np.hstack((y, 10))
mi_2 = _compute_mi(x, y, True, False)
mi_2_ = _compute_mi(x, y, True, False, n_jobs=2)

assert mi_1 == mi_2
assert mi_1_ == mi_2_


# We are going test that feature ordering by MI matches our expectations.
Expand Down Expand Up @@ -139,6 +151,9 @@ def test_mutual_info_regression():
mi = mutual_info_regression(X, y, random_state=0)
assert_array_equal(np.argsort(-mi), np.array([1, 2, 0]))

mi_ = mutual_info_regression(X, y, random_state=0, n_jobs=2)
assert_array_equal(np.argsort(-mi_), np.array([1, 2, 0]))


def test_mutual_info_classif_mixed():
# Here the target is discrete and there are two continuous and one
Expand All @@ -163,6 +178,27 @@ def test_mutual_info_classif_mixed():
# The MI should be the same
assert mi_nn[2] == mi[2]

mi_ = mutual_info_classif(
X, y, discrete_features=[2], n_neighbors=3, random_state=0, n_jobs=2
)
assert_array_equal(np.argsort(-mi_), [2, 0, 1])
for n_neighbors in [5, 7, 9]:
mi_nn_ = mutual_info_classif(
X,
y,
discrete_features=[2],
n_neighbors=n_neighbors,
random_state=0,
n_jobs=2,
)
# Check that the continuous values have an higher MI with greater
# n_neighbors
assert mi_nn_[0] > mi_[0]
assert mi_nn_[1] > mi_[1]
# The n_neighbors should not have any effect on the discrete value
# The MI should be the same
assert mi_nn_[2] == mi_[2]


def test_mutual_info_options():
X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=float)
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.