@@ -139,7 +139,6 @@ def discretize(
139
139
# If there is an exception we try to randomize and rerun SVD again
140
140
# do this max_svd_restarts times.
141
141
while (svd_restarts < max_svd_restarts ) and not has_converged :
142
-
143
142
# Initialize first column of rotation matrix with a row of the
144
143
# eigenvectors
145
144
rotation = np .zeros ((n_components , n_components ))
@@ -345,50 +344,20 @@ def spectral_clustering(
345
344
David Zhuzhunashvili, Andrew Knyazev
346
345
<10.1109/HPEC.2017.8091045>`
347
346
"""
348
- if assign_labels not in ("kmeans" , "discretize" , "cluster_qr" ):
349
- raise ValueError (
350
- "The 'assign_labels' parameter should be "
351
- "'kmeans' or 'discretize', or 'cluster_qr', "
352
- f"but { assign_labels !r} was given"
353
- )
354
- if isinstance (affinity , np .matrix ):
355
- raise TypeError (
356
- "spectral_clustering does not support passing in affinity as an "
357
- "np.matrix. Please convert to a numpy array with np.asarray. For "
358
- "more information see: "
359
- "https://numpy.org/doc/stable/reference/generated/numpy.matrix.html" , # noqa
360
- )
361
347
362
- random_state = check_random_state (random_state )
363
- n_components = n_clusters if n_components is None else n_components
364
-
365
- # We now obtain the real valued solution matrix to the
366
- # relaxed Ncut problem, solving the eigenvalue problem
367
- # L_sym x = lambda x and recovering u = D^-1/2 x.
368
- # The first eigenvector is constant only for fully connected graphs
369
- # and should be kept for spectral clustering (drop_first = False)
370
- # See spectral_embedding documentation.
371
- maps = spectral_embedding (
372
- affinity ,
348
+ clusterer = SpectralClustering (
349
+ n_clusters = n_clusters ,
373
350
n_components = n_components ,
374
351
eigen_solver = eigen_solver ,
375
352
random_state = random_state ,
353
+ n_init = n_init ,
354
+ affinity = "precomputed" ,
376
355
eigen_tol = eigen_tol ,
377
- drop_first = False ,
378
- )
379
- if verbose :
380
- print (f"Computing label assignment using { assign_labels } " )
381
-
382
- if assign_labels == "kmeans" :
383
- _ , labels , _ = k_means (
384
- maps , n_clusters , random_state = random_state , n_init = n_init , verbose = verbose
385
- )
386
- elif assign_labels == "cluster_qr" :
387
- labels = cluster_qr (maps )
388
- else :
389
- labels = discretize (maps , random_state = random_state )
356
+ assign_labels = assign_labels ,
357
+ verbose = verbose ,
358
+ ).fit (affinity )
390
359
391
- return labels
360
+ return clusterer . labels_
392
361
393
362
394
363
class SpectralClustering (ClusterMixin , BaseEstimator ):
@@ -747,17 +716,39 @@ def fit(self, X, y=None):
747
716
)
748
717
749
718
random_state = check_random_state (self .random_state )
750
- self .labels_ = spectral_clustering (
719
+ n_components = (
720
+ self .n_clusters if self .n_components is None else self .n_components
721
+ )
722
+ # We now obtain the real valued solution matrix to the
723
+ # relaxed Ncut problem, solving the eigenvalue problem
724
+ # L_sym x = lambda x and recovering u = D^-1/2 x.
725
+ # The first eigenvector is constant only for fully connected graphs
726
+ # and should be kept for spectral clustering (drop_first = False)
727
+ # See spectral_embedding documentation.
728
+ maps = spectral_embedding (
751
729
self .affinity_matrix_ ,
752
- n_clusters = self .n_clusters ,
753
- n_components = self .n_components ,
730
+ n_components = n_components ,
754
731
eigen_solver = self .eigen_solver ,
755
732
random_state = random_state ,
756
- n_init = self .n_init ,
757
733
eigen_tol = self .eigen_tol ,
758
- assign_labels = self .assign_labels ,
759
- verbose = self .verbose ,
734
+ drop_first = False ,
760
735
)
736
+ if self .verbose :
737
+ print (f"Computing label assignment using { self .assign_labels } " )
738
+
739
+ if self .assign_labels == "kmeans" :
740
+ _ , self .labels_ , _ = k_means (
741
+ maps ,
742
+ self .n_clusters ,
743
+ random_state = random_state ,
744
+ n_init = self .n_init ,
745
+ verbose = self .verbose ,
746
+ )
747
+ elif self .assign_labels == "cluster_qr" :
748
+ self .labels_ = cluster_qr (maps )
749
+ else :
750
+ self .labels_ = discretize (maps , random_state = random_state )
751
+
761
752
return self
762
753
763
754
def fit_predict (self , X , y = None ):
0 commit comments