diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 3874e10c03b26..66772f23b172f 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -380,7 +380,9 @@ Changelog - |Feature| :func:`metrics.roc_auc_score` now supports micro-averaging (`average="micro"`) for the One-vs-Rest multiclass case (`multi_class="ovr"`). - :pr:`24338` by :user:`Arturo Amor `. + +- |Fix| :func: `metrics.ndcg_score` and `metrics.dcg_score` will raise error if `y_true` is a single value. + :pr:`24482` by :user:`Madi Ebersole `. :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 3fea49b1e285c..161021a60f036 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1379,6 +1379,12 @@ def _tie_averaged_dcg(y_true, y_score, discount_cumsum): return (ranked * discount_sums).sum() +def _check_dcg_target_length(y_true): + """Check that y_true has the correct shape for DCG.""" + if len(y_true) == 1 and len(y_true[0]) == 1: + raise ValueError("Cannot compute DCG for a single sample.") + + def _check_dcg_target_type(y_true): y_type = type_of_target(y_true, input_name="y_true") supported_fmt = ( @@ -1492,6 +1498,7 @@ def dcg_score( y_true = check_array(y_true, ensure_2d=False) y_score = check_array(y_score, ensure_2d=False) check_consistent_length(y_true, y_score, sample_weight) + _check_dcg_target_length(y_true) _check_dcg_target_type(y_true) return np.average( _dcg_sample_scores( @@ -1541,6 +1548,7 @@ def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False): dcg_score : Discounted Cumulative Gain (not normalized). """ + gain = _dcg_sample_scores(y_true, y_score, k, ignore_ties=ignore_ties) # Here we use the order induced by y_true so we can ignore ties since # the gain associated to tied indices is the same (permuting ties doesn't @@ -1660,6 +1668,8 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False " raise a ValueError on negative y_true values starting from version 1.4.", FutureWarning, ) + # raise value error if y_true or y_score is single input + _check_dcg_target_length(y_true) _check_dcg_target_type(y_true) gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties) return np.average(gain, weights=sample_weight) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 827c145ed01dc..0848d8c47853e 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -1746,6 +1746,14 @@ def _test_dcg_score_for(y_true, y_score): assert ideal == pytest.approx((np.sort(y_true)[:, ::-1] / discount).sum(axis=1)) +def _test_dcg_score_errors(): + # raise error if y_true is single value + y_true = np.array([1]) + y_score = np.array([1]) + with pytest.raises(ValueError, match="Cannot compute DCG for a single sample."): + dcg_score(y_true, y_score) + + def test_dcg_ties(): y_true = np.asarray([np.arange(5)]) y_score = np.zeros(y_true.shape) @@ -1859,6 +1867,14 @@ def _test_ndcg_score_for(y_true, y_score): assert score.shape == (y_true.shape[0],) +def _test_ndcg_score_errors(): + # raise error if y_true is single value + y_true = np.array([1]) + y_score = np.array([1]) + with pytest.raises(ValueError, match="Cannot compute DCG for a single sample."): + ndcg_score(y_true, y_score) + + def test_partial_roc_auc_score(): # Check `roc_auc_score` for max_fpr != `None` y_true = np.array([0, 0, 1, 1])