lesteve
diff --git a/‎doc/whats_new/upcoming_changes/sklearn.neighbors/30047.enhancement.rst
Copy file name to clipboard
+6Lines changed: 6 additions & 0 deletions b/‎doc/whats_new/upcoming_changes/sklearn.neighbors/30047.enhancement.rst
Copy file name to clipboard
+6Lines changed: 6 additions & 0 deletions
diff --git a/‎sklearn/neighbors/_classification.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/_classification.py
+85-11Lines changed: 85 additions & 11 deletions b/‎sklearn/neighbors/_classification.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/_classification.py
+85-11Lines changed: 85 additions & 11 deletions
diff --git a/‎sklearn/neighbors/_regression.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/_regression.py
+8-4Lines changed: 8 additions & 4 deletions b/‎sklearn/neighbors/_regression.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/_regression.py
+8-4Lines changed: 8 additions & 4 deletions
diff --git a/‎sklearn/neighbors/tests/test_neighbors.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/tests/test_neighbors.py
+44-1Lines changed: 44 additions & 1 deletion b/‎sklearn/neighbors/tests/test_neighbors.py
Copy file name to clipboardExpand all lines: sklearn/neighbors/tests/test_neighbors.py
+44-1Lines changed: 44 additions & 1 deletion
@@ -0,0 +1,6 @@
+- Make `predict`, `predict_proba`, and `score` of
+  :class:`neighbors.KNeighborsClassifier` and
+  :class:`neighbors.RadiusNeighborsClassifier` accept `X=None` as input. In this case
+  predictions for all training set points are returned, and points are not included
+  into their own neighbors.
+  :pr:`30047` by :user:`Dmitry Kobak <dkobak>`.
@@ -244,8 +244,10 @@ def predict(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
@@ -281,7 +283,7 @@ def predict(self, X):
             classes_ = [self.classes_]
 
         n_outputs = len(classes_)
-        n_queries = _num_samples(X)
+        n_queries = _num_samples(self._fit_X if X is None else X)
         weights = _get_weights(neigh_dist, self.weights)
         if weights is not None and _all_with_any_reduction_axis_1(weights, value=0):
             raise ValueError(
@@ -311,8 +313,10 @@ def predict_proba(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
@@ -375,7 +379,7 @@ def predict_proba(self, X):
             _y = self._y.reshape((-1, 1))
             classes_ = [self.classes_]
 
-        n_queries = _num_samples(X)
+        n_queries = _num_samples(self._fit_X if X is None else X)
 
         weights = _get_weights(neigh_dist, self.weights)
         if weights is None:
@@ -408,6 +412,39 @@ def predict_proba(self, X):
 
         return probabilities
 
+    # This function is defined here only to modify the parent docstring
+    # and add information about X=None
+    def score(self, X, y, sample_weight=None):
+        """
+        Return the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features), or None
+            Test samples. If `None`, predictions for all indexed points are
+            used; in this case, points are not considered their own
+            neighbors. This means that `knn.fit(X, y).score(None, y)`
+            implicitly performs a leave-one-out cross-validation procedure
+            and is equivalent to `cross_val_score(knn, X, y, cv=LeaveOneOut())`
+            but typically much faster.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            True labels for `X`.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
+        """
+        return super().score(X, y, sample_weight)
+
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.classifier_tags.multi_label = True
@@ -692,8 +729,10 @@ def predict(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
@@ -734,8 +773,10 @@ def predict_proba(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
@@ -745,7 +786,7 @@ def predict_proba(self, X):
             by lexicographic order.
         """
         check_is_fitted(self, "_fit_method")
-        n_queries = _num_samples(X)
+        n_queries = _num_samples(self._fit_X if X is None else X)
 
         metric, metric_kwargs = _adjusted_metric(
             metric=self.metric, metric_kwargs=self.metric_params, p=self.p
@@ -846,6 +887,39 @@ def predict_proba(self, X):
 
         return probabilities
 
+    # This function is defined here only to modify the parent docstring
+    # and add information about X=None
+    def score(self, X, y, sample_weight=None):
+        """
+        Return the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features), or None
+            Test samples. If `None`, predictions for all indexed points are
+            used; in this case, points are not considered their own
+            neighbors. This means that `knn.fit(X, y).score(None, y)`
+            implicitly performs a leave-one-out cross-validation procedure
+            and is equivalent to `cross_val_score(knn, X, y, cv=LeaveOneOut())`
+            but typically much faster.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            True labels for `X`.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
+        """
+        return super().score(X, y, sample_weight)
+
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.classifier_tags.multi_label = True
 
@@ -234,8 +234,10 @@ def predict(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
@@ -464,8 +466,10 @@ def predict(self, X):
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_queries, n_features), \
-                or (n_queries, n_indexed) if metric == 'precomputed'
-            Test samples.
+                or (n_queries, n_indexed) if metric == 'precomputed', or None
+            Test samples. If `None`, predictions for all indexed points are
+            returned; in this case, points are not considered their own
+            neighbors.
 
         Returns
         -------
 
@@ -24,7 +24,12 @@
     assert_compatible_argkmin_results,
     assert_compatible_radius_results,
 )
-from sklearn.model_selection import cross_val_score, train_test_split
+from sklearn.model_selection import (
+    LeaveOneOut,
+    cross_val_predict,
+    cross_val_score,
+    train_test_split,
+)
 from sklearn.neighbors import (
     VALID_METRICS_SPARSE,
     KNeighborsRegressor,
@@ -2390,3 +2395,41 @@ def _weights(dist):
 
     with pytest.raises(ValueError, match=msg):
         est.predict_proba([[1.1, 1.1]])
+
+
+@pytest.mark.parametrize(
+    "nn_model",
+    [
+        neighbors.KNeighborsClassifier(n_neighbors=10),
+        neighbors.RadiusNeighborsClassifier(radius=5.0),
+    ],
+)
+def test_neighbor_classifiers_loocv(nn_model):
+    """Check that `predict` and related functions work fine with X=None"""
+    X, y = datasets.make_blobs(n_samples=500, centers=5, n_features=2, random_state=0)
+
+    loocv = cross_val_score(nn_model, X, y, cv=LeaveOneOut())
+    nn_model.fit(X, y)
+
+    assert np.all(loocv == (nn_model.predict(None) == y))
+    assert np.mean(loocv) == nn_model.score(None, y)
+    assert nn_model.score(None, y) < nn_model.score(X, y)
+
+
+@pytest.mark.parametrize(
+    "nn_model",
+    [
+        neighbors.KNeighborsRegressor(n_neighbors=10),
+        neighbors.RadiusNeighborsRegressor(radius=0.5),
+    ],
+)
+def test_neighbor_regressors_loocv(nn_model):
+    """Check that `predict` and related functions work fine with X=None"""
+    X, y = datasets.load_diabetes(return_X_y=True)
+
+    # Only checking cross_val_predict and not cross_val_score because
+    # cross_val_score does not work with LeaveOneOut() for a regressor
+    loocv = cross_val_predict(nn_model, X, y, cv=LeaveOneOut())
+    nn_model.fit(X, y)
+
+    assert np.all(loocv == nn_model.predict(None))