scikit-learn · glemaitre · Mar 24, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -301,6 +301,10 @@ Changelog
  both return `np.nan`.
  :pr:`25531` by :user:`Marc Torrellas Socastro <marctorsoc>`.

+- |Fix| :func:`metric.ndcg_score` now gives a meaningful error message for input of
+  length 1.
+  :pr:`25672` by :user:`Lene Preuss <lene>` and :user:`Wei-Chun Chu <wcchu>`.
+
 - |Enhancement| :class:`metrics.silhouette_samples` nows accepts a sparse
  matrix of pairwise distances between samples, or a feature array.
  :pr:`18723` by :user:`Sahil Gupta <sahilgupta2105>` and

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -1733,10 +1733,16 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
    if y_true.min() < 0:
        # TODO(1.4): Replace warning w/ ValueError
        warnings.warn(
-            "ndcg_score should not be used on negative y_true values. ndcg_score will"
-            " raise a ValueError on negative y_true values starting from version 1.4.",
+            "ndcg_score should not be used on negative y_true values. ndcg_score"
+            " will raise a ValueError on negative y_true values starting from"
+            " version 1.4.",
            FutureWarning,
        )
+    if y_true.ndim > 1 and y_true.shape[1] <= 1:
+        raise ValueError(
+            "Computing NDCG is only meaningful when there is more than 1 document. "
+            f"Got {y_true.shape[1]} instead."
+        )
    _check_dcg_target_type(y_true)
    gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)
    return np.average(gain, weights=sample_weight)

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -1535,7 +1535,6 @@ def test_lrap_error_raised():
 @pytest.mark.parametrize("n_classes", (2, 5, 10))
 @pytest.mark.parametrize("random_state", range(1))
 def test_alternative_lrap_implementation(n_samples, n_classes, random_state):
-
    check_alternative_lrap_implementation(
        label_ranking_average_precision_score, n_classes, n_samples, random_state
    )
@@ -1835,6 +1834,17 @@ def test_ndcg_toy_examples(ignore_ties):
    assert ndcg_score(y_true, y_score, ignore_ties=ignore_ties) == pytest.approx(1.0)


+def test_ndcg_error_single_document():
+    """Check that we raise an informative error message when trying to
+    compute NDCG with a single document."""
+    err_msg = (
+        "Computing NDCG is only meaningful when there is more than 1 document. "
+        "Got 1 instead."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        ndcg_score([[1]], [[1]])
+
+
 def test_ndcg_score():
    _, y_true = make_multilabel_classification(random_state=0, n_classes=10)
    y_score = -y_true + 1