@@ -35,8 +35,11 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
35
35
``metric='precomputed'``.
36
36
37
37
eps : float, optional
38
- The maximum distance between two samples for them to be considered
39
- as in the same neighborhood.
38
+ The maximum distance between two samples for one to be considered
39
+ as in the neighborhood of the other. This is not a maximum bound
40
+ on the distances of points within a cluster. This is the most
41
+ important DBSCAN parameter to choose appropriately for your data set
42
+ and distance function.
40
43
41
44
min_samples : int, optional
42
45
The number of samples (or total weight) in a neighborhood for a point
@@ -128,6 +131,10 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
128
131
Algorithm for Discovering Clusters in Large Spatial Databases with Noise".
129
132
In: Proceedings of the 2nd International Conference on Knowledge Discovery
130
133
and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996
134
+
135
+ Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).
136
+ DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.
137
+ ACM Transactions on Database Systems (TODS), 42(3), 19.
131
138
"""
132
139
if not eps > 0.0 :
133
140
raise ValueError ("eps must be positive." )
@@ -195,8 +202,11 @@ class DBSCAN(BaseEstimator, ClusterMixin):
195
202
Parameters
196
203
----------
197
204
eps : float, optional
198
- The maximum distance between two samples for them to be considered
199
- as in the same neighborhood.
205
+ The maximum distance between two samples for one to be considered
206
+ as in the neighborhood of the other. This is not a maximum bound
207
+ on the distances of points within a cluster. This is the most
208
+ important DBSCAN parameter to choose appropriately for your data set
209
+ and distance function.
200
210
201
211
min_samples : int, optional
202
212
The number of samples (or total weight) in a neighborhood for a point
@@ -300,6 +310,10 @@ class DBSCAN(BaseEstimator, ClusterMixin):
300
310
Algorithm for Discovering Clusters in Large Spatial Databases with Noise".
301
311
In: Proceedings of the 2nd International Conference on Knowledge Discovery
302
312
and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996
313
+
314
+ Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).
315
+ DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.
316
+ ACM Transactions on Database Systems (TODS), 42(3), 19.
303
317
"""
304
318
305
319
def __init__ (self , eps = 0.5 , min_samples = 5 , metric = 'euclidean' ,
0 commit comments