scikit-learn
diff --git a/‎sklearn/neighbors/tests/test_neighbors.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/tests/test_neighbors.py
+50-12Lines changed: 50 additions & 12 deletions b/‎sklearn/neighbors/tests/test_neighbors.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/tests/test_neighbors.py
+50-12Lines changed: 50 additions & 12 deletions
@@ -2401,35 +2401,73 @@ def _weights(dist):
     "nn_model",
     [
         neighbors.KNeighborsClassifier(n_neighbors=10),
-        neighbors.RadiusNeighborsClassifier(radius=5.0),
+        neighbors.RadiusNeighborsClassifier(),
     ],
 )
-def test_neighbor_classifiers_loocv(nn_model):
-    """Check that `predict` and related functions work fine with X=None"""
-    X, y = datasets.make_blobs(n_samples=500, centers=5, n_features=2, random_state=0)
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_neighbor_classifiers_loocv(nn_model, algorithm):
+    """Check that `predict` and related functions work fine with X=None
+
+    Calling predict with X=None computes a prediction for each training point
+    from the labels of its neighbors (without the label of the data point being
+    predicted upon). This is therefore mathematically equivalent to
+    leave-one-out cross-validation without having do any retraining (rebuilding
+    a KD-tree or Ball-tree index) or any data reshuffling.
+    """
+    X, y = datasets.make_blobs(n_samples=15, centers=5, n_features=2, random_state=0)
+
+    nn_model = clone(nn_model).set_params(algorithm=algorithm)
+
+    # Set the radius for RadiusNeighborsRegressor to some percentile of the
+    # empirical pairwise distances to avoid trivial test cases and warnings for
+    # predictions with no neighbors within the radius.
+    if "radius" in nn_model.get_params():
+        dists = pairwise_distances(X).ravel()
+        dists = dists[dists > 0]
+        nn_model.set_params(radius=np.percentile(dists, 80))
 
     loocv = cross_val_score(nn_model, X, y, cv=LeaveOneOut())
     nn_model.fit(X, y)
 
-    assert np.all(loocv == (nn_model.predict(None) == y))
-    assert np.mean(loocv) == nn_model.score(None, y)
+    assert_allclose(loocv, nn_model.predict(None) == y)
+    assert np.mean(loocv) == pytest.approx(nn_model.score(None, y))
+
+    # Evaluating `nn_model` on its "training" set should lead to a higher
+    # accuracy value than leaving out each data point in turn because the
+    # former can overfit while the latter cannot by construction.
     assert nn_model.score(None, y) < nn_model.score(X, y)
 
 
 @pytest.mark.parametrize(
     "nn_model",
     [
         neighbors.KNeighborsRegressor(n_neighbors=10),
-        neighbors.RadiusNeighborsRegressor(radius=0.5),
+        neighbors.RadiusNeighborsRegressor(),
     ],
 )
-def test_neighbor_regressors_loocv(nn_model):
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+def test_neighbor_regressors_loocv(nn_model, algorithm):
     """Check that `predict` and related functions work fine with X=None"""
-    X, y = datasets.load_diabetes(return_X_y=True)
+    X, y = datasets.make_regression(n_samples=15, n_features=2, random_state=0)
 
     # Only checking cross_val_predict and not cross_val_score because
-    # cross_val_score does not work with LeaveOneOut() for a regressor
+    # cross_val_score does not work with LeaveOneOut() for a regressor: the
+    # default score method implements R2 score which is not well defined for a
+    # single data point.
+    #
+    # TODO: if score is refactored to evaluate models for other scoring
+    # functions, then this test can be extended to check cross_val_score as
+    # well.
+    nn_model = clone(nn_model).set_params(algorithm=algorithm)
+
+    # Set the radius for RadiusNeighborsRegressor to some percentile of the
+    # empirical pairwise distances to avoid trivial test cases and warnings for
+    # predictions with no neighbors within the radius.
+    if "radius" in nn_model.get_params():
+        dists = pairwise_distances(X).ravel()
+        dists = dists[dists > 0]
+        nn_model.set_params(radius=np.percentile(dists, 80))
+
     loocv = cross_val_predict(nn_model, X, y, cv=LeaveOneOut())
     nn_model.fit(X, y)
-
-    assert np.all(loocv == nn_model.predict(None))
+    assert_allclose(loocv, nn_model.predict(None))