diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py index aa2e5425976e9..6d023cd7b9b37 100644 --- a/sklearn/metrics/ranking.py +++ b/sklearn/metrics/ranking.py @@ -23,6 +23,7 @@ import numpy as np from scipy.sparse import csr_matrix from scipy.stats import rankdata +from functools import partial from ..utils import assert_all_finite from ..utils import check_consistent_length @@ -188,8 +189,8 @@ def _binary_uninterpolated_average_precision( sample_weight=sample_weight) - -def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): +def roc_auc_score(y_true, y_score, average="macro", sample_weight=None, + pos_label=None): """Compute Area Under the Curve (AUC) from prediction scores Note: this implementation is restricted to the binary classification task @@ -226,6 +227,12 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): sample_weight : array-like of shape = [n_samples], optional Sample weights. + pos_label : int or str, optional + Default is ``None``. If ``None``, pos_label is considered to be 1. + + The label of the positive class. For multilabel-indicator y_true, + pos_label is fixed to 1. + Returns ------- auc : float @@ -251,18 +258,33 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None): 0.75 """ - def _binary_roc_auc_score(y_true, y_score, sample_weight=None): + def _binary_roc_auc_score(y_true, y_score, sample_weight=None, + pos_label=None): if len(np.unique(y_true)) != 2: raise ValueError("Only one class present in y_true. ROC AUC score " "is not defined in that case.") fpr, tpr, tresholds = roc_curve(y_true, y_score, + pos_label=pos_label, sample_weight=sample_weight) return auc(fpr, tpr, reorder=True) - return _average_binary_score( - _binary_roc_auc_score, y_true, y_score, average, - sample_weight=sample_weight) + y_type = type_of_target(y_true) + if y_type == "binary": + _partial_binary_roc_auc_score = partial(_binary_roc_auc_score, + pos_label=pos_label) + return _average_binary_score( + _partial_binary_roc_auc_score, y_true, y_score, average, + sample_weight=sample_weight) + else: + if pos_label is not None and pos_label != 1: + raise ValueError("Parameter pos_label is fixed to 1 for" + "multilabel-indicator y_true. Do not set " + "pos_label or set pos_label to either None " + "or 1.") + return _average_binary_score( + _binary_roc_auc_score, y_true, y_score, average, + sample_weight=sample_weight) def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 5f775aaf9ac8f..0eb7873be92d3 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -257,6 +257,7 @@ "macro_f0.5_score", "macro_f1_score", "macro_f2_score", "macro_precision_score", "macro_recall_score", + "roc_auc_score", ] # Metrics with a "labels" argument @@ -595,7 +596,7 @@ def test_invariance_string_vs_numbers_labels(): for name, metric in THRESHOLDED_METRICS.items(): if name in ("log_loss", "hinge_loss", "unnormalized_log_loss", - "brier_score_loss"): + "brier_score_loss", "roc_auc_score"): # Ugly, but handle case with a pos_label and label metric_str = metric if name in METRICS_WITH_POS_LABEL: @@ -607,7 +608,7 @@ def test_invariance_string_vs_numbers_labels(): err_msg="{0} failed string vs number " "invariance test".format(name)) - measure_with_strobj = metric(y1_str.astype('O'), y2) + measure_with_strobj = metric_str(y1_str.astype('O'), y2) assert_array_equal(measure_with_number, measure_with_strobj, err_msg="{0} failed string object vs number " "invariance test".format(name)) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index db80691663606..277846bac8b3f 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -371,6 +371,19 @@ def test_roc_curve_drop_intermediate(): [1.0, 0.9, 0.7, 0.6, 0.]) +def test_roc_auc_score_pos_label_multilabel_indicator(): + # Raise an error for multilabel-indicator y_true with + # pos_label other than None or 1 + y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) + y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]]) + roc_auc_score_1 = roc_auc_score(y_true, y_pred, pos_label=None) + assert_almost_equal(roc_auc_score_1, 0.75) + roc_auc_score_2 = roc_auc_score(y_true, y_pred, pos_label=1) + assert_almost_equal(roc_auc_score_2, 0.75) + assert_raises(ValueError, roc_auc_score, y_true, y_pred, + pos_label=0) + + def test_auc(): # Test Area Under Curve (AUC) computation x = [0, 1]