diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 857a332cc2371..8d6d609811458 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -5,7 +5,6 @@ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -import warnings from numbers import Integral, Real import numpy as np @@ -399,14 +398,6 @@ def fit(self, X, y=None, sample_weight=None): # Calculate neighborhood for all samples. This leaves the original # point in, which needs to be considered later (i.e. point i is in the # neighborhood of point i. While True, its useless information) - if self.metric == "precomputed" and sparse.issparse(X): - # set the diagonal to explicit values, as a point is its own - # neighbor - X = X.copy() # copy to avoid in-place modification - with warnings.catch_warnings(): - warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) - X.setdiag(X.diagonal()) - neighbors_model = NearestNeighbors( radius=self.eps, algorithm=self.algorithm, @@ -420,6 +411,13 @@ def fit(self, X, y=None, sample_weight=None): # This has worst case O(n^2) memory complexity neighborhoods = neighbors_model.radius_neighbors(X, return_distance=False) + # Each point is its own neighbor, so update the neighborhoods + # accordingly after the initial fitting + if self.metric == "precomputed" and sparse.issparse(X): + for i, neighborhood in enumerate(neighborhoods): + if i not in neighborhoods[i]: + neighborhoods[i] = np.append(neighborhood, i) + if sample_weight is None: n_neighbors = np.array([len(neighbors) for neighbors in neighborhoods]) else: