|
| 1 | +""" |
| 2 | +======================================= |
| 3 | +Metrics specific to imbalanced learning |
| 4 | +======================================= |
| 5 | +
|
| 6 | +Specific metrics have been developed to evaluate classifier which |
| 7 | +has been trained using imbalanced data. `imblearn` provides mainly |
| 8 | +two additional metrics which are not implemented in `sklearn`: (i) |
| 9 | +geometric mean and (ii) indexed balanced accuracy. |
| 10 | +""" |
| 11 | + |
| 12 | +from sklearn import datasets |
| 13 | +from sklearn.svm import LinearSVC |
| 14 | +from sklearn.model_selection import train_test_split |
| 15 | + |
| 16 | +from imblearn import over_sampling as os |
| 17 | +from imblearn import pipeline as pl |
| 18 | +from imblearn.metrics import (geometric_mean_score, |
| 19 | + make_indexed_balanced_accuracy) |
| 20 | + |
| 21 | +print(__doc__) |
| 22 | + |
| 23 | +RANDOM_STATE = 42 |
| 24 | + |
| 25 | +# Generate a dataset |
| 26 | +X, y = datasets.make_classification(n_classes=3, class_sep=2, |
| 27 | + weights=[0.1, 0.9], n_informative=10, |
| 28 | + n_redundant=1, flip_y=0, n_features=20, |
| 29 | + n_clusters_per_class=4, n_samples=5000, |
| 30 | + random_state=RANDOM_STATE) |
| 31 | + |
| 32 | +pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE), |
| 33 | + LinearSVC(random_state=RANDOM_STATE)) |
| 34 | + |
| 35 | +# Split the data |
| 36 | +X_train, X_test, y_train, y_test = train_test_split(X, y, |
| 37 | + random_state=RANDOM_STATE) |
| 38 | + |
| 39 | +# Train the classifier with balancing |
| 40 | +pipeline.fit(X_train, y_train) |
| 41 | + |
| 42 | +# Test the classifier and get the prediction |
| 43 | +y_pred_bal = pipeline.predict(X_test) |
| 44 | + |
| 45 | +############################################################################### |
| 46 | +# The geometric mean corresponds to the square root of the product of the |
| 47 | +# sensitivity and specificity. Combining the two metrics should account for |
| 48 | +# the balancing of the dataset. |
| 49 | + |
| 50 | +print('The geometric mean is {}'.format(geometric_mean_score( |
| 51 | + y_test, |
| 52 | + y_pred_bal))) |
| 53 | + |
| 54 | +############################################################################### |
| 55 | +# The indexed balanced accuracy can transform any metric to be used in |
| 56 | +# imbalanced learning problems. |
| 57 | + |
| 58 | +alpha = 0.1 |
| 59 | +geo_mean = make_indexed_balanced_accuracy(alpha=alpha, squared=True)( |
| 60 | + geometric_mean_score) |
| 61 | + |
| 62 | +print('The IBA using alpha = {} and the geometric mean: {}'.format( |
| 63 | + alpha, geo_mean( |
| 64 | + y_test, |
| 65 | + y_pred_bal))) |
| 66 | + |
| 67 | +alpha = 0.5 |
| 68 | +geo_mean = make_indexed_balanced_accuracy(alpha=alpha, squared=True)( |
| 69 | + geometric_mean_score) |
| 70 | + |
| 71 | +print('The IBA using alpha = {} and the geometric mean: {}'.format( |
| 72 | + alpha, geo_mean( |
| 73 | + y_test, |
| 74 | + y_pred_bal))) |
0 commit comments