scikit-learn · rth · Sep 8, 2019 · Aug 21, 2019 · Aug 21, 2019 · Aug 21, 2019
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -2,6 +2,7 @@
 from functools import partial
 from itertools import product
 import warnings
+import re

 import numpy as np
 from scipy import linalg
@@ -13,8 +14,6 @@
 from sklearn.datasets import make_multilabel_classification
 from sklearn.preprocessing import label_binarize, LabelBinarizer
 from sklearn.utils.validation import check_random_state
-from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -249,10 +248,12 @@ def test_precision_recall_f_extra_labels():
    # Error when introducing invalid label in multilabel case
    # (although it would only affect performance if average='macro'/None)
    for average in [None, 'macro', 'micro', 'samples']:
-        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
-                      labels=np.arange(6), average=average)
-        assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
-                      labels=np.arange(-1, 4), average=average)
+        with pytest.raises(ValueError):
+            recall_score(y_true_bin, y_pred_bin, labels=np.arange(6),
+                         average=average)
+        with pytest.raises(ValueError):
+            recall_score(y_true_bin, y_pred_bin, labels=np.arange(-1, 4),
+                         average=average)

    # tests non-regression on issue #10307
    y_true = np.array([[0, 1, 1], [1, 0, 0]])
@@ -297,8 +298,9 @@ def test_average_precision_score_score_non_binary_class():

    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
-    assert_raise_message(ValueError, "multiclass format is not supported",
-                         average_precision_score, y_true, y_pred)
+    err_msg = "multiclass format is not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        average_precision_score(y_true, y_pred)


 def test_average_precision_score_duplicate_values():
@@ -330,16 +332,19 @@ def test_precision_recall_fscore_support_errors():
    y_true, y_pred, _ = make_prediction(binary=True)

    # Bad beta
-    assert_raises(ValueError, precision_recall_fscore_support,
-                  y_true, y_pred, beta=-0.1)
+    with pytest.raises(ValueError):
+        precision_recall_fscore_support(y_true, y_pred, beta=-0.1)

    # Bad pos_label
-    assert_raises(ValueError, precision_recall_fscore_support,
-                  y_true, y_pred, pos_label=2, average='binary')
+    with pytest.raises(ValueError):
+        precision_recall_fscore_support(y_true, y_pred,
+                                        pos_label=2,
+                                        average='binary')

    # Bad average option
-    assert_raises(ValueError, precision_recall_fscore_support,
-                  [0, 1, 2], [1, 2, 0], average='mega')
+    with pytest.raises(ValueError):
+        precision_recall_fscore_support([0, 1, 2], [1, 2, 0],
+                                        average='mega')


 def test_precision_recall_f_unused_pos_label():
@@ -477,34 +482,31 @@ def test_multilabel_confusion_matrix_errors():
    y_pred = np.array([[1, 0, 0], [0, 1, 1], [0, 0, 1]])

    # Bad sample_weight
-    assert_raise_message(ValueError, "inconsistent numbers of samples",
-                         multilabel_confusion_matrix,
-                         y_true, y_pred, sample_weight=[1, 2])
-    assert_raise_message(ValueError, "bad input shape",
-                         multilabel_confusion_matrix,
-                         y_true, y_pred,
-                         sample_weight=[[1, 2, 3],
-                                        [2, 3, 4],
-                                        [3, 4, 5]])
+    with pytest.raises(ValueError, match="inconsistent numbers of samples"):
+        multilabel_confusion_matrix(y_true, y_pred, sample_weight=[1, 2])
+    with pytest.raises(ValueError, match="bad input shape"):
+        multilabel_confusion_matrix(y_true, y_pred,
+                                    sample_weight=[[1, 2, 3],
+                                                   [2, 3, 4],
+                                                   [3, 4, 5]])

    # Bad labels
-    assert_raise_message(ValueError, "All labels must be in [0, n labels)",
-                         multilabel_confusion_matrix,
-                         y_true, y_pred, labels=[-1])
-    assert_raise_message(ValueError, "All labels must be in [0, n labels)",
-                         multilabel_confusion_matrix,
-                         y_true, y_pred, labels=[3])
+    err_msg = r"All labels must be in \[0, n labels\)"
+    with pytest.raises(ValueError, match=err_msg):
+        multilabel_confusion_matrix(y_true, y_pred, labels=[-1])
+    err_msg = r"All labels must be in \[0, n labels\)"
+    with pytest.raises(ValueError, match=err_msg):
+        multilabel_confusion_matrix(y_true, y_pred, labels=[3])

    # Using samplewise outside multilabel
-    assert_raise_message(ValueError, "Samplewise metrics",
-                         multilabel_confusion_matrix,
-                         [0, 1, 2], [1, 2, 0], samplewise=True)
+    with pytest.raises(ValueError, match="Samplewise metrics"):
+        multilabel_confusion_matrix([0, 1, 2], [1, 2, 0], samplewise=True)

    # Bad y_type
-    assert_raise_message(ValueError, "multiclass-multioutput is not supported",
-                         multilabel_confusion_matrix,
-                         [[0, 1, 2], [2, 1, 0]],
-                         [[1, 2, 0], [1, 0, 2]])
+    err_msg = "multiclass-multioutput is not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        multilabel_confusion_matrix([[0, 1, 2], [2, 1, 0]],
+                                    [[1, 2, 0], [1, 0, 2]])


 def test_cohen_kappa():
@@ -621,8 +623,9 @@ def test_matthews_corrcoef():
    mask = [1] * 10 + [0] * 10
    # Now the first half of the vector elements are alone given a weight of 1
    # and hence the mcc will not be a perfect 0 as in the previous case
-    assert_raises(AssertionError, assert_almost_equal,
-                  matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
+    with pytest.raises(AssertionError):
+        assert_almost_equal(matthews_corrcoef(y_1, y_2,
+                                              sample_weight=mask), 0.)


 def test_matthews_corrcoef_multiclass():
@@ -754,12 +757,14 @@ def test_precision_recall_f1_score_multiclass():
    fs = f1_score(y_true, y_pred, average='weighted')
    assert_array_almost_equal(fs, 0.47, 2)

-    assert_raises(ValueError, precision_score, y_true, y_pred,
-                  average="samples")
-    assert_raises(ValueError, recall_score, y_true, y_pred, average="samples")
-    assert_raises(ValueError, f1_score, y_true, y_pred, average="samples")
-    assert_raises(ValueError, fbeta_score, y_true, y_pred, average="samples",
-                  beta=0.5)
+    with pytest.raises(ValueError):
+        precision_score(y_true, y_pred, average="samples")
+    with pytest.raises(ValueError):
+        recall_score(y_true, y_pred, average="samples")
+    with pytest.raises(ValueError):
+        f1_score(y_true, y_pred, average="samples")
+    with pytest.raises(ValueError):
+        fbeta_score(y_true, y_pred, average="samples", beta=0.5)

    # same prediction but with and explicit label ordering
    p, r, f, s = precision_recall_fscore_support(
@@ -847,8 +852,9 @@ def test_confusion_matrix_multiclass_subset_labels():
                            [0, 0]])

    # check for exception when none of the specified labels are in y_true
-    assert_raises(ValueError, confusion_matrix, y_true, y_pred,
-                  labels=[extra_label, extra_label + 1])
+    with pytest.raises(ValueError):
+        confusion_matrix(y_true, y_pred,
+                         labels=[extra_label, extra_label + 1])


 def test_confusion_matrix_dtype():
@@ -1063,12 +1069,11 @@ def test_classification_report_no_labels_target_names_unequal_length():
    y_pred = [0, 2, 2, 0, 0]
    target_names = ['class 0', 'class 1', 'class 2']

-    assert_raise_message(ValueError,
-                         "Number of classes, 2, does not "
-                         "match size of target_names, 3. "
-                         "Try specifying the labels parameter",
-                         classification_report,
-                         y_true, y_pred, target_names=target_names)
+    err_msg = ("Number of classes, 2, does not "
+               "match size of target_names, 3. "
+               "Try specifying the labels parameter")
+    with pytest.raises(ValueError, match=err_msg):
+        classification_report(y_true, y_pred, target_names=target_names)


 @ignore_warnings
@@ -1146,29 +1151,29 @@ def test_multilabel_hamming_loss():
 def test_jaccard_score_validation():
    y_true = np.array([0, 1, 0, 1, 1])
    y_pred = np.array([0, 1, 0, 1, 1])
-    assert_raise_message(ValueError, "pos_label=2 is not a valid label: "
-                         "array([0, 1])", jaccard_score, y_true,
-                         y_pred, average='binary', pos_label=2)
+    err_msg = r"pos_label=2 is not a valid label: array\(\[0, 1\]\)"
+    with pytest.raises(ValueError, match=err_msg):
+        jaccard_score(y_true, y_pred, average='binary', pos_label=2)

    y_true = np.array([[0, 1, 1], [1, 0, 0]])
    y_pred = np.array([[1, 1, 1], [1, 0, 1]])
-    msg1 = ("Target is multilabel-indicator but average='binary'. "
-            "Please choose another average setting, one of [None, "
-            "'micro', 'macro', 'weighted', 'samples'].")
-    assert_raise_message(ValueError, msg1, jaccard_score, y_true,
-                         y_pred, average='binary', pos_label=-1)
+    msg1 = (r"Target is multilabel-indicator but average='binary'. "
+            r"Please choose another average setting, one of \[None, "
+            r"'micro', 'macro', 'weighted', 'samples'\].")
+    with pytest.raises(ValueError, match=msg1):
+        jaccard_score(y_true, y_pred, average='binary', pos_label=-1)

    y_true = np.array([0, 1, 1, 0, 2])
    y_pred = np.array([1, 1, 1, 1, 0])
-    msg2 = ("Target is multiclass but average='binary'. Please choose "
-            "another average setting, one of [None, 'micro', 'macro', "
-            "'weighted'].")
-    assert_raise_message(ValueError, msg2, jaccard_score, y_true,
-                         y_pred, average='binary')
+    msg2 = (r"Target is multiclass but average='binary'. Please choose "
+            r"another average setting, one of \[None, 'micro', 'macro', "
+            r"'weighted'\].")
+    with pytest.raises(ValueError, match=msg2):
+        jaccard_score(y_true, y_pred, average='binary')
    msg3 = ("Samplewise metrics are not available outside of multilabel "
            "classification.")
-    assert_raise_message(ValueError, msg3, jaccard_score, y_true,
-                         y_pred, average='samples')
+    with pytest.raises(ValueError, match=msg3):
+        jaccard_score(y_true, y_pred, average='samples')

    assert_warns_message(UserWarning,
                         "Note that pos_label (set to 3) is ignored when "
@@ -1224,11 +1229,11 @@ def test_multilabel_jaccard_score(recwarn):
                                      average='weighted'), 7. / 8)

    msg2 = 'Got 4 > 2'
-    assert_raise_message(ValueError, msg2, jaccard_score, y_true,
-                         y_pred, labels=[4], average='macro')
+    with pytest.raises(ValueError, match=msg2):
+        jaccard_score(y_true, y_pred, labels=[4], average='macro')
    msg3 = 'Got -1 < 0'
-    assert_raise_message(ValueError, msg3, jaccard_score, y_true,
-                         y_pred, labels=[-1], average='macro')
+    with pytest.raises(ValueError, match=msg3):
+        jaccard_score(y_true, y_pred, labels=[-1], average='macro')

    msg = ('Jaccard is ill-defined and being set to 0.0 in labels '
           'with no true or predicted samples.')
@@ -1680,23 +1685,23 @@ def test_prf_average_binary_data_non_binary():
    # Error if user does not explicitly set non-binary average mode
    y_true_mc = [1, 2, 3, 3]
    y_pred_mc = [1, 2, 3, 1]
-    msg_mc = ("Target is multiclass but average='binary'. Please "
-              "choose another average setting, one of ["
-              "None, 'micro', 'macro', 'weighted'].")
+    msg_mc = (r"Target is multiclass but average='binary'. Please "
+              r"choose another average setting, one of \["
+              r"None, 'micro', 'macro', 'weighted'\].")
    y_true_ind = np.array([[0, 1, 1], [1, 0, 0], [0, 0, 1]])
    y_pred_ind = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
-    msg_ind = ("Target is multilabel-indicator but average='binary'. Please "
-               "choose another average setting, one of ["
-               "None, 'micro', 'macro', 'weighted', 'samples'].")
+    msg_ind = (r"Target is multilabel-indicator but average='binary'. Please "
+               r"choose another average setting, one of \["
+               r"None, 'micro', 'macro', 'weighted', 'samples'\].")

    for y_true, y_pred, msg in [
        (y_true_mc, y_pred_mc, msg_mc),
        (y_true_ind, y_pred_ind, msg_ind),
    ]:
        for metric in [precision_score, recall_score, f1_score,
                       partial(fbeta_score, beta=2)]:
-            assert_raise_message(ValueError, msg,
-                                 metric, y_true, y_pred)
+            with pytest.raises(ValueError, match=msg):
+                metric(y_true, y_pred)


 def test__check_targets():
@@ -1757,20 +1762,20 @@ def test__check_targets():
        except KeyError:
            expected = EXPECTED[type2, type1]
        if expected is None:
-            assert_raises(ValueError, _check_targets, y1, y2)
+            with pytest.raises(ValueError):
+                _check_targets(y1, y2)

            if type1 != type2:
-                assert_raise_message(
-                    ValueError,
-                    "Classification metrics can't handle a mix of {0} and {1} "
-                    "targets".format(type1, type2),
-                    _check_targets, y1, y2)
+                err_msg = ("Classification metrics can't handle a mix "
+                           "of {0} and {1} targets".format(type1, type2))
+                with pytest.raises(ValueError, match=err_msg):
+                    _check_targets(y1, y2)

            else:
                if type1 not in (BIN, MC, IND):
-                    assert_raise_message(ValueError,
-                                         "{0} is not supported".format(type1),
-                                         _check_targets, y1, y2)
+                    err_msg = "{0} is not supported".format(type1)
+                    with pytest.raises(ValueError, match=err_msg):
+                        _check_targets(y1, y2)

        else:
            merged_type, y1out, y2out = _check_targets(y1, y2)
@@ -1781,7 +1786,8 @@ def test__check_targets():
            else:
                assert_array_equal(y1out, np.squeeze(y1))
                assert_array_equal(y2out, np.squeeze(y2))
-            assert_raises(ValueError, _check_targets, y1[:-1], y2)
+            with pytest.raises(ValueError):
+                _check_targets(y1[:-1], y2)

    # Make sure seq of seq is not supported
    y1 = [(1, 2,), (0, 2, 3)]
@@ -1790,7 +1796,8 @@ def test__check_targets():
           'Sequence of sequences are no longer supported; use a binary array'
           ' or sparse matrix instead - the MultiLabelBinarizer'
           ' transformer can convert to this format.')
-    assert_raise_message(ValueError, msg, _check_targets, y1, y2)
+    with pytest.raises(ValueError, match=msg):
+        _check_targets(y1, y2)


 def test__check_targets_multiclass_with_both_y_true_and_y_pred_binary():
@@ -1844,9 +1851,8 @@ def test_hinge_loss_multiclass_missing_labels_with_labels_none():
    ])
    error_message = ("Please include all labels in y_true "
                     "or pass labels as third argument")
-    assert_raise_message(ValueError,
-                         error_message,
-                         hinge_loss, y_true, pred_decision)
+    with pytest.raises(ValueError, match=error_message):
+        hinge_loss(y_true, pred_decision)


 def test_hinge_loss_multiclass_with_missing_labels():
@@ -1928,7 +1934,8 @@ def test_log_loss():
    # raise error if number of classes are not equal.
    y_true = [1, 0, 2]
    y_pred = [[0.2, 0.7], [0.6, 0.5], [0.4, 0.1]]
-    assert_raises(ValueError, log_loss, y_true, y_pred)
+    with pytest.raises(ValueError):
+        log_loss(y_true, y_pred)

    # case when y_true is a string array object
    y_true = ["ham", "spam", "spam", "ham"]
@@ -1941,14 +1948,15 @@ def test_log_loss():
    y_true = [2, 2]
    y_pred = [[0.2, 0.7], [0.6, 0.5]]
    y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
-    error_str = ('y_true contains only one label (2). Please provide '
-                 'the true labels explicitly through the labels argument.')
-    assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
+    error_str = (r'y_true contains only one label \(2\). Please provide '
+                 r'the true labels explicitly through the labels argument.')
+    with pytest.raises(ValueError, match=error_str):
+        log_loss(y_true, y_pred)

    y_pred = [[0.2, 0.7], [0.6, 0.5], [0.2, 0.3]]
    error_str = ('Found input variables with inconsistent numbers of samples: '
                 '[3, 2]')
-    assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
+    (ValueError, error_str, log_loss, y_true, y_pred)

    # works when the labels argument is used

@@ -1992,17 +2000,20 @@ def test_brier_score_loss():
                        true_score)
    assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred),
                        true_score)
-    assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
-    assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
-    assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
+    with pytest.raises(ValueError):
+        brier_score_loss(y_true, y_pred[1:])
+    with pytest.raises(ValueError):
+        brier_score_loss(y_true, y_pred + 1.)
+    with pytest.raises(ValueError):
+        brier_score_loss(y_true, y_pred - 1.)

    # ensure to raise an error for multiclass y_true
    y_true = np.array([0, 1, 2, 0])
    y_pred = np.array([0.8, 0.6, 0.4, 0.2])
    error_message = ("Only binary classification is supported. Labels "
                     "in y_true: {}".format(np.array([0, 1, 2])))
-    assert_raise_message(ValueError, error_message, brier_score_loss,
-                         y_true, y_pred)
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        brier_score_loss(y_true, y_pred)

    # calculate correctly when there's only one class in y_true
    assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16)