Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5d2107e

Browse filesBrowse files
authored
DOC document and deprecate missing attributes in MiniBatchKMeans (#17864)
1 parent 89e49b6 commit 5d2107e
Copy full SHA for 5d2107e

File tree

4 files changed

+67
-21
lines changed
Filter options

4 files changed

+67
-21
lines changed

‎doc/whats_new/v0.24.rst

Copy file name to clipboardExpand all lines: doc/whats_new/v0.24.rst
+5-1Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Changelog
5454
:user:`Lucy Liu <lucyleeow>`.
5555

5656
:mod:`sklearn.cluster`
57-
.......................
57+
......................
5858

5959
- |Fix| Fixed a bug in :class:`cluster.MeanShift` with `bin_seeding=True`. When
6060
the estimated bandwidth is 0, the behavior is equivalent to
@@ -66,6 +66,10 @@ Changelog
6666
weighted by the sample weights. :pr:`17848` by
6767
:user:`Jérémie du Boisberranger <jeremiedbb>`.
6868

69+
- |API| :class:`cluster.MiniBatchKMeans` attributes, `counts_` and
70+
`init_size_`, are deprecated and will be removed in 0.26. :pr:`17864` by
71+
:user:`Jérémie du Boisberranger <jeremiedbb>`.
72+
6973
:mod:`sklearn.covariance`
7074
.........................
7175

‎sklearn/cluster/_kmeans.py

Copy file name to clipboardExpand all lines: sklearn/cluster/_kmeans.py
+44-12Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from ..utils import check_array
2727
from ..utils import gen_batches
2828
from ..utils import check_random_state
29+
from ..utils import deprecated
2930
from ..utils.validation import check_is_fitted, _check_sample_weight
3031
from ..utils._openmp_helpers import _openmp_effective_n_threads
3132
from ..exceptions import ConvergenceWarning
@@ -1531,6 +1532,21 @@ class MiniBatchKMeans(KMeans):
15311532
defined as the sum of square distances of samples to their nearest
15321533
neighbor.
15331534
1535+
n_iter_ : int
1536+
Number of batches processed.
1537+
1538+
counts_ : ndarray of shape (n_clusters,)
1539+
Weigth sum of each cluster.
1540+
1541+
.. deprecated:: 0.24
1542+
This attribute is deprecated in 0.24 and will be removed in 0.26.
1543+
1544+
init_size_ : int
1545+
The effective number of samples used for the initialization.
1546+
1547+
.. deprecated:: 0.24
1548+
This attribute is deprecated in 0.24 and will be removed in 0.26.
1549+
15341550
See Also
15351551
--------
15361552
KMeans
@@ -1588,6 +1604,24 @@ def __init__(self, n_clusters=8, *, init='k-means++', max_iter=100,
15881604
self.init_size = init_size
15891605
self.reassignment_ratio = reassignment_ratio
15901606

1607+
@deprecated("The attribute 'counts_' is deprecated in 0.24" # type: ignore
1608+
" and will be removed in 0.26.")
1609+
@property
1610+
def counts_(self):
1611+
return self._counts
1612+
1613+
@deprecated("The attribute 'init_size_' is deprecated in " # type: ignore
1614+
"0.24 and will be removed in 0.26.")
1615+
@property
1616+
def init_size_(self):
1617+
return self._init_size
1618+
1619+
@deprecated("The attribute 'random_state_' is deprecated " # type: ignore
1620+
"in 0.24 and will be removed in 0.26.")
1621+
@property
1622+
def random_state_(self):
1623+
return getattr(self, "_random_state", None)
1624+
15911625
def _check_params(self, X):
15921626
super()._check_params(X)
15931627

@@ -1619,8 +1653,6 @@ def _check_params(self, X):
16191653
RuntimeWarning, stacklevel=2)
16201654
self._init_size = 3 * self.n_clusters
16211655
self._init_size = min(self._init_size, X.shape[0])
1622-
# FIXME: init_size_ will be deprecated and this line will be removed
1623-
self.init_size_ = self._init_size
16241656

16251657
# reassignment_ratio
16261658
if self.reassignment_ratio < 0:
@@ -1727,7 +1759,7 @@ def fit(self, X, y=None, sample_weight=None):
17271759
% (init_idx + 1, self._n_init, inertia))
17281760
if best_inertia is None or inertia < best_inertia:
17291761
self.cluster_centers_ = cluster_centers
1730-
self.counts_ = weight_sums
1762+
self._counts = weight_sums
17311763
best_inertia = inertia
17321764

17331765
# Empty context to be used inplace by the convergence check routine
@@ -1744,15 +1776,15 @@ def fit(self, X, y=None, sample_weight=None):
17441776
batch_inertia, centers_squared_diff = _mini_batch_step(
17451777
X[minibatch_indices], sample_weight[minibatch_indices],
17461778
x_squared_norms[minibatch_indices],
1747-
self.cluster_centers_, self.counts_,
1779+
self.cluster_centers_, self._counts,
17481780
old_center_buffer, tol > 0.0, distances=distances,
17491781
# Here we randomly choose whether to perform
17501782
# random reassignment: the choice is done as a function
17511783
# of the iteration index, and the minimum number of
17521784
# counts, in order to force this reassignment to happen
17531785
# every once in a while
17541786
random_reassign=((iteration_idx + 1)
1755-
% (10 + int(self.counts_.min())) == 0),
1787+
% (10 + int(self._counts.min())) == 0),
17561788
random_state=random_state,
17571789
reassignment_ratio=self.reassignment_ratio,
17581790
verbose=self.verbose)
@@ -1831,7 +1863,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
18311863
order='C', accept_large_sparse=False,
18321864
reset=is_first_call_to_partial_fit)
18331865

1834-
self.random_state_ = getattr(self, "random_state_",
1866+
self._random_state = getattr(self, "_random_state",
18351867
check_random_state(self.random_state))
18361868
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
18371869

@@ -1850,26 +1882,26 @@ def partial_fit(self, X, y=None, sample_weight=None):
18501882
# initialize the cluster centers
18511883
self.cluster_centers_ = _init_centroids(
18521884
X, self.n_clusters, init,
1853-
random_state=self.random_state_,
1885+
random_state=self._random_state,
18541886
x_squared_norms=x_squared_norms, init_size=self.init_size)
18551887

1856-
self.counts_ = np.zeros(self.n_clusters,
1888+
self._counts = np.zeros(self.n_clusters,
18571889
dtype=sample_weight.dtype)
18581890
random_reassign = False
18591891
distances = None
18601892
else:
18611893
# The lower the minimum count is, the more we do random
18621894
# reassignment, however, we don't want to do random
18631895
# reassignment too often, to allow for building up counts
1864-
random_reassign = self.random_state_.randint(
1865-
10 * (1 + self.counts_.min())) == 0
1896+
random_reassign = self._random_state.randint(
1897+
10 * (1 + self._counts.min())) == 0
18661898
distances = np.zeros(X.shape[0], dtype=X.dtype)
18671899

18681900
_mini_batch_step(X, sample_weight, x_squared_norms,
1869-
self.cluster_centers_, self.counts_,
1901+
self.cluster_centers_, self._counts,
18701902
np.zeros(0, dtype=X.dtype), 0,
18711903
random_reassign=random_reassign, distances=distances,
1872-
random_state=self.random_state_,
1904+
random_state=self._random_state,
18731905
reassignment_ratio=self.reassignment_ratio,
18741906
verbose=self.verbose)
18751907

‎sklearn/cluster/tests/test_k_means.py

Copy file name to clipboardExpand all lines: sklearn/cluster/tests/test_k_means.py
+16-3Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ def test_minibatch_reassign():
434434
# Turn on verbosity to smoke test the display code
435435
_mini_batch_step(this_X, sample_weight, (X ** 2).sum(axis=1),
436436
mb_k_means.cluster_centers_,
437-
mb_k_means.counts_,
437+
mb_k_means._counts,
438438
np.zeros(X.shape[1], np.double),
439439
False, distances=np.zeros(X.shape[0]),
440440
random_reassign=True, random_state=42,
@@ -454,7 +454,7 @@ def test_minibatch_reassign():
454454
# Turn on verbosity to smoke test the display code
455455
_mini_batch_step(this_X, sample_weight, (X ** 2).sum(axis=1),
456456
mb_k_means.cluster_centers_,
457-
mb_k_means.counts_,
457+
mb_k_means._counts,
458458
np.zeros(X.shape[1], np.double),
459459
False, distances=np.zeros(X.shape[0]),
460460
random_reassign=True, random_state=42,
@@ -529,7 +529,7 @@ def test_minibatch_set_init_size():
529529
init_size=666, random_state=42,
530530
n_init=1).fit(X)
531531
assert mb_k_means.init_size == 666
532-
assert mb_k_means.init_size_ == n_samples
532+
assert mb_k_means._init_size == n_samples
533533
_check_fitted_model(mb_k_means)
534534

535535

@@ -933,6 +933,19 @@ def test_n_jobs_deprecated(n_jobs):
933933
kmeans.fit(X)
934934

935935

936+
@pytest.mark.parametrize("attr", ["counts_", "init_size_", "random_state_"])
937+
def test_minibatch_kmeans_deprecated_attributes(attr):
938+
# check that we raise a deprecation warning when accessing `init_size_`
939+
# FIXME: remove in 0.26
940+
depr_msg = (f"The attribute '{attr}' is deprecated in 0.24 and will be "
941+
f"removed in 0.26.")
942+
km = MiniBatchKMeans(n_clusters=2, n_init=1, init='random', random_state=0)
943+
km.fit(X)
944+
945+
with pytest.warns(FutureWarning, match=depr_msg):
946+
getattr(km, attr)
947+
948+
936949
def test_warning_elkan_1_cluster():
937950
X, _ = make_blobs(n_samples=10, n_features=2, centers=1, random_state=0)
938951
kmeans = KMeans(n_clusters=1, n_init=1, init='random', random_state=0,

‎sklearn/tests/test_docstring_parameters.py

Copy file name to clipboardExpand all lines: sklearn/tests/test_docstring_parameters.py
+2-5Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,13 +232,10 @@ def test_fit_docstring_attributes(name, Estimator):
232232
with ignore_warnings(category=FutureWarning):
233233
assert hasattr(est, attr.name)
234234

235-
IGNORED = {'BayesianRidge', 'Birch', 'CCA', 'CategoricalNB',
236-
'KernelCenterer',
235+
IGNORED = {'BayesianRidge', 'Birch', 'CCA',
237236
'LarsCV', 'Lasso', 'LassoLarsIC',
238-
'MiniBatchKMeans',
239237
'OrthogonalMatchingPursuit',
240-
'PLSCanonical', 'PLSSVD',
241-
'PassiveAggressiveClassifier'}
238+
'PLSCanonical', 'PLSSVD'}
242239

243240
if Estimator.__name__ in IGNORED:
244241
pytest.xfail(

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.