26
26
from ..utils import check_array
27
27
from ..utils import gen_batches
28
28
from ..utils import check_random_state
29
+ from ..utils import deprecated
29
30
from ..utils .validation import check_is_fitted , _check_sample_weight
30
31
from ..utils ._openmp_helpers import _openmp_effective_n_threads
31
32
from ..exceptions import ConvergenceWarning
@@ -1531,6 +1532,21 @@ class MiniBatchKMeans(KMeans):
1531
1532
defined as the sum of square distances of samples to their nearest
1532
1533
neighbor.
1533
1534
1535
+ n_iter_ : int
1536
+ Number of batches processed.
1537
+
1538
+ counts_ : ndarray of shape (n_clusters,)
1539
+ Weigth sum of each cluster.
1540
+
1541
+ .. deprecated:: 0.24
1542
+ This attribute is deprecated in 0.24 and will be removed in 0.26.
1543
+
1544
+ init_size_ : int
1545
+ The effective number of samples used for the initialization.
1546
+
1547
+ .. deprecated:: 0.24
1548
+ This attribute is deprecated in 0.24 and will be removed in 0.26.
1549
+
1534
1550
See Also
1535
1551
--------
1536
1552
KMeans
@@ -1588,6 +1604,24 @@ def __init__(self, n_clusters=8, *, init='k-means++', max_iter=100,
1588
1604
self .init_size = init_size
1589
1605
self .reassignment_ratio = reassignment_ratio
1590
1606
1607
+ @deprecated ("The attribute 'counts_' is deprecated in 0.24" # type: ignore
1608
+ " and will be removed in 0.26." )
1609
+ @property
1610
+ def counts_ (self ):
1611
+ return self ._counts
1612
+
1613
+ @deprecated ("The attribute 'init_size_' is deprecated in " # type: ignore
1614
+ "0.24 and will be removed in 0.26." )
1615
+ @property
1616
+ def init_size_ (self ):
1617
+ return self ._init_size
1618
+
1619
+ @deprecated ("The attribute 'random_state_' is deprecated " # type: ignore
1620
+ "in 0.24 and will be removed in 0.26." )
1621
+ @property
1622
+ def random_state_ (self ):
1623
+ return getattr (self , "_random_state" , None )
1624
+
1591
1625
def _check_params (self , X ):
1592
1626
super ()._check_params (X )
1593
1627
@@ -1619,8 +1653,6 @@ def _check_params(self, X):
1619
1653
RuntimeWarning , stacklevel = 2 )
1620
1654
self ._init_size = 3 * self .n_clusters
1621
1655
self ._init_size = min (self ._init_size , X .shape [0 ])
1622
- # FIXME: init_size_ will be deprecated and this line will be removed
1623
- self .init_size_ = self ._init_size
1624
1656
1625
1657
# reassignment_ratio
1626
1658
if self .reassignment_ratio < 0 :
@@ -1727,7 +1759,7 @@ def fit(self, X, y=None, sample_weight=None):
1727
1759
% (init_idx + 1 , self ._n_init , inertia ))
1728
1760
if best_inertia is None or inertia < best_inertia :
1729
1761
self .cluster_centers_ = cluster_centers
1730
- self .counts_ = weight_sums
1762
+ self ._counts = weight_sums
1731
1763
best_inertia = inertia
1732
1764
1733
1765
# Empty context to be used inplace by the convergence check routine
@@ -1744,15 +1776,15 @@ def fit(self, X, y=None, sample_weight=None):
1744
1776
batch_inertia , centers_squared_diff = _mini_batch_step (
1745
1777
X [minibatch_indices ], sample_weight [minibatch_indices ],
1746
1778
x_squared_norms [minibatch_indices ],
1747
- self .cluster_centers_ , self .counts_ ,
1779
+ self .cluster_centers_ , self ._counts ,
1748
1780
old_center_buffer , tol > 0.0 , distances = distances ,
1749
1781
# Here we randomly choose whether to perform
1750
1782
# random reassignment: the choice is done as a function
1751
1783
# of the iteration index, and the minimum number of
1752
1784
# counts, in order to force this reassignment to happen
1753
1785
# every once in a while
1754
1786
random_reassign = ((iteration_idx + 1 )
1755
- % (10 + int (self .counts_ .min ())) == 0 ),
1787
+ % (10 + int (self ._counts .min ())) == 0 ),
1756
1788
random_state = random_state ,
1757
1789
reassignment_ratio = self .reassignment_ratio ,
1758
1790
verbose = self .verbose )
@@ -1831,7 +1863,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
1831
1863
order = 'C' , accept_large_sparse = False ,
1832
1864
reset = is_first_call_to_partial_fit )
1833
1865
1834
- self .random_state_ = getattr (self , "random_state_ " ,
1866
+ self ._random_state = getattr (self , "_random_state " ,
1835
1867
check_random_state (self .random_state ))
1836
1868
sample_weight = _check_sample_weight (sample_weight , X , dtype = X .dtype )
1837
1869
@@ -1850,26 +1882,26 @@ def partial_fit(self, X, y=None, sample_weight=None):
1850
1882
# initialize the cluster centers
1851
1883
self .cluster_centers_ = _init_centroids (
1852
1884
X , self .n_clusters , init ,
1853
- random_state = self .random_state_ ,
1885
+ random_state = self ._random_state ,
1854
1886
x_squared_norms = x_squared_norms , init_size = self .init_size )
1855
1887
1856
- self .counts_ = np .zeros (self .n_clusters ,
1888
+ self ._counts = np .zeros (self .n_clusters ,
1857
1889
dtype = sample_weight .dtype )
1858
1890
random_reassign = False
1859
1891
distances = None
1860
1892
else :
1861
1893
# The lower the minimum count is, the more we do random
1862
1894
# reassignment, however, we don't want to do random
1863
1895
# reassignment too often, to allow for building up counts
1864
- random_reassign = self .random_state_ .randint (
1865
- 10 * (1 + self .counts_ .min ())) == 0
1896
+ random_reassign = self ._random_state .randint (
1897
+ 10 * (1 + self ._counts .min ())) == 0
1866
1898
distances = np .zeros (X .shape [0 ], dtype = X .dtype )
1867
1899
1868
1900
_mini_batch_step (X , sample_weight , x_squared_norms ,
1869
- self .cluster_centers_ , self .counts_ ,
1901
+ self .cluster_centers_ , self ._counts ,
1870
1902
np .zeros (0 , dtype = X .dtype ), 0 ,
1871
1903
random_reassign = random_reassign , distances = distances ,
1872
- random_state = self .random_state_ ,
1904
+ random_state = self ._random_state ,
1873
1905
reassignment_ratio = self .reassignment_ratio ,
1874
1906
verbose = self .verbose )
1875
1907
0 commit comments