Finish sensitivity and specificity

Guillaume Lemaitre · Guillaume Lemaitre · commit 0eddca61cfc9 · 2016-12-22T18:59:36.000+01:00
diff --git a/imblearn/metrics/classification.py b/imblearn/metrics/classification.py
@@ -40,7 +40,7 @@ def sensitivity_specificity_support(y_true, y_pred, labels=None,
 
     If ``pos_label is None`` and in binary classification, this function
     returns the average sensitivity and specificity if ``average``
-    is one of ``'micro'`` or 'weighted'``.
+    is one of ``'weighted'``.
 
     Parameters
     ----------
@@ -105,8 +105,7 @@ def sensitivity_specificity_support(y_true, y_pred, labels=None,
            <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
 
     """
-
-    average_options = (None, 'micro', 'macro', 'weighted')
+    average_options = (None, 'macro', 'weighted')
     if average not in average_options and average != 'binary':
         raise ValueError('average has to be one of ' +
                          str(average_options))
@@ -154,24 +153,20 @@ def sensitivity_specificity_support(y_true, y_pred, labels=None,
     y_pred = le.transform(y_pred)
     sorted_labels = le.classes_
 
-    LOGGER.debug(y_true)
-    LOGGER.debug(y_pred)
-    LOGGER.debug(sorted_labels)
-
     LOGGER.debug('The number of labels is %s' % n_labels)
 
     # In a leave out strategy and for each label, compute:
     # TP, TN, FP, FN
     # These list contain an array in which each sample is labeled as
     # TP, TN, FP, FN
     list_tp = [np.bitwise_and((y_true == label), (y_pred == label))
-               for label in sorted_labels]
+               for label in range(sorted_labels.size)]
     list_tn = [np.bitwise_and((y_true != label), (y_pred != label))
-               for label in sorted_labels]
+               for label in range(sorted_labels.size)]
     list_fp = [np.bitwise_and((y_true != label), (y_pred == label))
-               for label in sorted_labels]
+               for label in range(sorted_labels.size)]
     list_fn = [np.bitwise_and((y_true == label), (y_pred != label))
-               for label in sorted_labels]
+               for label in range(sorted_labels.size)]
 
     # Compute the sum for each type
     # We keep only the counting corresponding to True values
@@ -197,42 +192,32 @@ def sensitivity_specificity_support(y_true, y_pred, labels=None,
     # Sort the support
     support = support[indices]
 
-
     LOGGER.debug('The indices which are retained are %s' % indices)
 
-    LOGGER.debug('TP: %s' % tp_sum)
-    LOGGER.debug('TN: %s' % tn_sum)
-    LOGGER.debug('FP: %s' % fp_sum)
-    LOGGER.debug('FN: %s' % fn_sum)
-
     tp_sum = tp_sum[indices]
     tn_sum = tn_sum[indices]
     fp_sum = fp_sum[indices]
     fn_sum = fn_sum[indices]
 
-    if average == 'micro':
-        tp_sum = np.array([tp_sum.sum()])
-        tn_sum = np.array([tn_sum.sum()])
-        fp_sum = np.array([fp_sum.sum()])
-        fn_sum = np.array([fn_sum.sum()])
-
-    LOGGER.debug('Did we do the average micro %s' % tp_sum)
-
     LOGGER.debug('Computed the necessary stats for the sensitivity and'
                  ' specificity')
 
-    # Compute the sensitivity and specificity
-    sensitivity = [_prf_divide(tp, tp + fn, 'sensitivity', 'tp + fn', average,
-                               warn_for) for tp, fn in zip(tp_sum, fn_sum)]
-    specificity = [_prf_divide(tn, tn + fp, 'specificity', 'tn + fp', average,
-                               warn_for) for tn, fp in zip(tn_sum, fp_sum)]
-
-    LOGGER.debug('Sensitivity = %s - Specificity = %s' % (sensitivity,
-                                                          specificity))
+    LOGGER.debug(tp_sum)
+    LOGGER.debug(tn_sum)
+    LOGGER.debug(fp_sum)
+    LOGGER.debug(fn_sum)
 
-    LOGGER.debug('Computed the sensitivity and specificity for each class')
-    LOGGER.debug('The lengths of those two metrics are: %s - %s',
-                 len(sensitivity), len(specificity))
+    # Compute the sensitivity and specificity
+    with np.errstate(divide='ignore', invalid='ignore'):
+        sensitivity = _prf_divide(tp_sum, tp_sum + fn_sum, 'sensitivity',
+                                  'tp + fn', average, warn_for)
+        specificity = _prf_divide(tn_sum, tn_sum + fp_sum, 'specificity',
+                                  'tn + fp', average, warn_for)
+
+    # sensitivity = [_prf_divide(tp, tp + fn, 'sensitivity', 'tp + fn', average,
+    #                            warn_for) for tp, fn in zip(tp_sum, fn_sum)]
+    # specificity = [_prf_divide(tn, tn + fp, 'specificity', 'tn + fp', average,
+    #                            warn_for) for tn, fp in zip(tn_sum, fp_sum)]
 
     # If we need to weight the results
     if average == 'weighted':
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
@@ -2,6 +2,8 @@
 
 from __future__ import division, print_function
 
+from functools import partial
+
 import numpy as np
 
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
@@ -12,6 +14,8 @@
 from sklearn import datasets
 from sklearn import svm
 
+from sklearn.preprocessing import label_binarize
+from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.validation import check_random_state
 
 from imblearn.metrics import sensitivity_specificity_support
@@ -94,113 +98,93 @@ def test_sensitivity_specificity_support_binary():
         assert_array_almost_equal(spec, 0.88, 2)
 
 
-# def test_precision_recall_f_binary_single_class():
-#     # Test precision, recall and F1 score behave with a single positive or
-#     # negative class
-#     # Such a case may occur with non-stratified cross-validation
-#     assert_equal(1., precision_score([1, 1], [1, 1]))
-#     assert_equal(1., recall_score([1, 1], [1, 1]))
-#     assert_equal(1., f1_score([1, 1], [1, 1]))
-
-#     assert_equal(0., precision_score([-1, -1], [-1, -1]))
-#     assert_equal(0., recall_score([-1, -1], [-1, -1]))
-#     assert_equal(0., f1_score([-1, -1], [-1, -1]))
-
-
-# @ignore_warnings
-# def test_precision_recall_f_extra_labels():
-#     # Test handling of explicit additional (not in input) labels to PRF
-#     y_true = [1, 3, 3, 2]
-#     y_pred = [1, 1, 3, 2]
-#     y_true_bin = label_binarize(y_true, classes=np.arange(5))
-#     y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
-#     data = [(y_true, y_pred),
-#             (y_true_bin, y_pred_bin)]
-
-#     for i, (y_true, y_pred) in enumerate(data):
-#         # No average: zeros in array
-#         actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
-#                               average=None)
-#         assert_array_almost_equal([0., 1., 1., .5, 0.], actual)
-
-#         # Macro average is changed
-#         actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
-#                               average='macro')
-#         assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)
-
-#         # No effect otheriwse
-#         for average in ['micro', 'weighted', 'samples']:
-#             if average == 'samples' and i == 0:
-#                 continue
-#             assert_almost_equal(recall_score(y_true, y_pred,
-#                                              labels=[0, 1, 2, 3, 4],
-#                                              average=average),
-#                                 recall_score(y_true, y_pred, labels=None,
-#                                              average=average))
-
-#     # Error when introducing invalid label in multilabel case
-#     # (although it would only affect performance if average='macro'/None)
-#     for average in [None, 'macro', 'micro', 'samples']:
-#         assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
-#                       labels=np.arange(6), average=average)
-#         assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
-#                       labels=np.arange(-1, 4), average=average)
-
-
-# @ignore_warnings
-# def test_precision_recall_f_ignored_labels():
-#     # Test a subset of labels may be requested for PRF
-#     y_true = [1, 1, 2, 3]
-#     y_pred = [1, 3, 3, 3]
-#     y_true_bin = label_binarize(y_true, classes=np.arange(5))
-#     y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
-#     data = [(y_true, y_pred),
-#             (y_true_bin, y_pred_bin)]
-
-#     for i, (y_true, y_pred) in enumerate(data):
-#         recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
-#         recall_all = partial(recall_score, y_true, y_pred, labels=None)
-
-#         assert_array_almost_equal([.5, 1.], recall_13(average=None))
-#         assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
-#         assert_almost_equal((.5 * 2 + 1. * 1) / 3,
-#                             recall_13(average='weighted'))
-#         assert_almost_equal(2. / 3, recall_13(average='micro'))
-
-#         # ensure the above were meaningful tests:
-#         for average in ['macro', 'weighted', 'micro']:
-#             assert_not_equal(recall_13(average=average),
-#                              recall_all(average=average))
-
-
-# @ignore_warnings
-# def test_precision_recall_fscore_support_errors():
-#     y_true, y_pred, _ = make_prediction(binary=True)
-
-#     # Bad beta
-#     assert_raises(ValueError, precision_recall_fscore_support,
-#                   y_true, y_pred, beta=0.0)
-
-#     # Bad pos_label
-#     assert_raises(ValueError, precision_recall_fscore_support,
-#                   y_true, y_pred, pos_label=2, average='binary')
-
-#     # Bad average option
-#     assert_raises(ValueError, precision_recall_fscore_support,
-#                   [0, 1, 2], [1, 2, 0], average='mega')
-
-
-# def test_precision_recall_f_unused_pos_label():
-#     # Check warning that pos_label unused when set to non-default value
-#     # but average != 'binary'; even if data is binary.
-#     assert_warns_message(UserWarning,
-#                          "Note that pos_label (set to 2) is "
-#                          "ignored when average != 'binary' (got 'macro'). You "
-#                          "may use labels=[pos_label] to specify a single "
-#                          "positive class.", precision_recall_fscore_support,
-#                          [1, 2, 1], [1, 2, 2], pos_label=2, average='macro')
-
-def test_precision_recall_f1_score_multiclass():
+def test_sensitivity_specificity_binary_single_class():
+    # Test sensitivity and specificity score behave with a single positive or
+    # negative class
+    # Such a case may occur with non-stratified cross-validation
+    assert_equal(1., sensitivity_score([1, 1], [1, 1]))
+    assert_equal(0., specificity_score([1, 1], [1, 1]))
+
+    assert_equal(0., sensitivity_score([-1, -1], [-1, -1]))
+    assert_equal(0., specificity_score([-1, -1], [-1, -1]))
+
+
+def test_sensitivity_specificity_error_multilabels():
+    # Test either if an error is raised when the input are multilabels
+    y_true = [1, 3, 3, 2]
+    y_pred = [1, 1, 3, 2]
+    y_true_bin = label_binarize(y_true, classes=np.arange(5))
+    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
+
+    assert_raises(ValueError, sensitivity_score, y_true_bin, y_pred_bin)
+
+@ignore_warnings
+def test_sensitivity_specifiicity_extra_labels():
+    # Test handling of explicit additional (not in input) labels to SS
+    y_true = [1, 3, 3, 2]
+    y_pred = [1, 1, 3, 2]
+
+    actual = sensitivity_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
+                               average=None)
+    assert_array_almost_equal([0., 1., 1., .5, 0.], actual)
+
+    # Macro average is changed
+    actual = sensitivity_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
+                               average='macro')
+    assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)
+
+    # Weighted average is changed
+    assert_almost_equal(sensitivity_score(y_true, y_pred,
+                                          labels=[0, 1, 2, 3, 4],
+                                          average='weighted'),
+                        sensitivity_score(y_true, y_pred, labels=None,
+                                          average='weighted'))
+
+@ignore_warnings
+def test_sensitivity_specificity_f_ignored_labels():
+    # Test a subset of labels may be requested for SS
+    y_true = [1, 1, 2, 3]
+    y_pred = [1, 3, 3, 3]
+
+    sensitivity_13 = partial(sensitivity_score, y_true, y_pred, labels=[1, 3])
+    sensitivity_all = partial(sensitivity_score, y_true, y_pred, labels=None)
+
+    assert_array_almost_equal([.5, 1.], sensitivity_13(average=None))
+    assert_almost_equal((.5 + 1.) / 2, sensitivity_13(average='macro'))
+    assert_almost_equal((.5 * 2 + 1. * 1) / 3,
+                        sensitivity_13(average='weighted'))
+
+    # ensure the above were meaningful tests:
+    for average in ['macro', 'weighted']:
+        assert_not_equal(sensitivity_13(average=average),
+                         sensitivity_all(average=average))
+
+
+@ignore_warnings
+def test_sensitivity_specificity_support_errors():
+    y_true, y_pred, _ = make_prediction(binary=True)
+
+    # Bad pos_label
+    assert_raises(ValueError, sensitivity_specificity_support,
+                  y_true, y_pred, pos_label=2, average='binary')
+
+    # Bad average option
+    assert_raises(ValueError, sensitivity_specificity_support,
+                  [0, 1, 2], [1, 2, 0], average='mega')
+
+
+def test_sensitivity_specificity_unused_pos_label():
+    # Check warning that pos_label unused when set to non-default value
+    # but average != 'binary'; even if data is binary.
+    assert_warns_message(UserWarning,
+                         "Note that pos_label (set to 2) is "
+                         "ignored when average != 'binary' (got 'macro'). You "
+                         "may use labels=[pos_label] to specify a single "
+                         "positive class.", sensitivity_specificity_support,
+                         [1, 2, 1], [1, 2, 2], pos_label=2, average='macro')
+
+
+def test_sensitivity_specificity_multiclass():
     # Test Precision Recall and F1 Score for multiclass classification task
     y_true, y_pred, _ = make_prediction(binary=False)
 
@@ -212,15 +196,6 @@ def test_precision_recall_f1_score_multiclass():
     assert_array_equal(supp, [24, 31, 20])
 
     # averaging tests
-    spec = specificity_score(y_true, y_pred, pos_label=1, average='micro')
-    assert_array_almost_equal(spec, 0.77, 2)
-
-    sens = sensitivity_score(y_true, y_pred, average='micro')
-    assert_array_almost_equal(sens, 0.53, 2)
-
-    spec = specificity_score(y_true, y_pred, average='macro')
-    assert_array_almost_equal(spec, 0.77, 2)
-
     sens = sensitivity_score(y_true, y_pred, average='macro')
     assert_array_almost_equal(sens, 0.60, 2)