From 1bc6189401cd9d56016747e45e3947998d262c4d Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 16:40:56 +0500
Subject: [PATCH 1/2] DOC updates for D2 log loss

---
 sklearn/metrics/_classification.py           | 4 ++--
 sklearn/metrics/tests/test_classification.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 04894a4d7a7e7..2580a647a7b74 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3278,9 +3278,9 @@ def d2_log_loss_score(y_true, y_pred, *, sample_weight=None, labels=None):
 
     Best possible score is 1.0 and it can be negative (because the model can be
     arbitrarily worse). A model that always uses the empirical mean of `y_true` as
-    constant prediction, disregarding the input features, gets a D^2 score of 0.0.
+    a constant prediction, disregarding the input features, gets a D^2 score of 0.0.
 
-    Read more in the :ref:`User Guide <d2_score>`.
+    Read more in the :ref:`User Guide <d2_score_classification>`.
 
     .. versionadded:: 1.5
 
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 40b762bfa7308..b87e76ba2fb42 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -3048,7 +3048,8 @@ def test_d2_log_loss_score():
 
 
 def test_d2_log_loss_score_raises():
-    """Test that d2_log_loss raises error on invalid input."""
+    """Test that d2_log_loss_score raises the appropriate errors on
+    invalid inputs."""
     y_true = [0, 1, 2]
     y_pred = [[0.2, 0.8], [0.5, 0.5], [0.4, 0.6]]
     err = "contain different number of classes"

From 38f953a3007887f18a593c306e301c763a3b297b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 7 May 2024 16:57:08 +0500
Subject: [PATCH 2/2] Correct the definition in the docstring

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 2580a647a7b74..b68f1593e317e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -3277,8 +3277,8 @@ def d2_log_loss_score(y_true, y_pred, *, sample_weight=None, labels=None):
     :math:`D^2` score function, fraction of log loss explained.
 
     Best possible score is 1.0 and it can be negative (because the model can be
-    arbitrarily worse). A model that always uses the empirical mean of `y_true` as
-    a constant prediction, disregarding the input features, gets a D^2 score of 0.0.
+    arbitrarily worse). A model that always predicts the per-class proportions
+    of `y_true`, disregarding the input features, gets a D^2 score of 0.0.
 
     Read more in the :ref:`User Guide <d2_score_classification>`.