Skip to content

Commit 4eab795

Browse files
author
Guillaume Lemaitre
committed
finish the example
1 parent 813e240 commit 4eab795

File tree

4 files changed

+80
-4
lines changed

4 files changed

+80
-4
lines changed

examples/applications/plot_multi_class_under_sampling.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
random_state=RANDOM_STATE)
3333

3434
# Create a pipeline
35-
pipeline = make_pipeline(NearMiss(version=2), LinearSVC())
35+
pipeline = make_pipeline(NearMiss(version=2, random_state=RANDOM_STATE),
36+
LinearSVC(random_state=RANDOM_STATE))
3637
pipeline.fit(X_train, y_train)
3738

3839
# Classify and report the results

examples/evaluation/plot_classification_report.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Evaluate classification by compiling a report
44
=============================================
55
6-
Specific metrics has been developed to evaluate classifier which has been
6+
Specific metrics have been developed to evaluate classifier which has been
77
trained using imbalanced data. `imblearn` provides a classification
88
report similar to `sklearn`, with additional metrics specific to imbalanced
99
learning problem.
@@ -28,7 +28,8 @@
2828
n_clusters_per_class=4, n_samples=5000,
2929
random_state=RANDOM_STATE)
3030

31-
pipeline = pl.make_pipeline(os.SMOTE(), LinearSVC())
31+
pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE),
32+
LinearSVC(random_state=RANDOM_STATE))
3233

3334
# Split the data
3435
X_train, X_test, y_train, y_test = train_test_split(X, y,

examples/evaluation/plot_metrics.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
=======================================
3+
Metrics specific to imbalanced learning
4+
=======================================
5+
6+
Specific metrics have been developed to evaluate classifier which
7+
has been trained using imbalanced data. `imblearn` provides mainly
8+
two additional metrics which are not implemented in `sklearn`: (i)
9+
geometric mean and (ii) indexed balanced accuracy.
10+
"""
11+
12+
from sklearn import datasets
13+
from sklearn.svm import LinearSVC
14+
from sklearn.model_selection import train_test_split
15+
16+
from imblearn import over_sampling as os
17+
from imblearn import pipeline as pl
18+
from imblearn.metrics import (geometric_mean_score,
19+
make_indexed_balanced_accuracy)
20+
21+
print(__doc__)
22+
23+
RANDOM_STATE = 42
24+
25+
# Generate a dataset
26+
X, y = datasets.make_classification(n_classes=3, class_sep=2,
27+
weights=[0.1, 0.9], n_informative=10,
28+
n_redundant=1, flip_y=0, n_features=20,
29+
n_clusters_per_class=4, n_samples=5000,
30+
random_state=RANDOM_STATE)
31+
32+
pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE),
33+
LinearSVC(random_state=RANDOM_STATE))
34+
35+
# Split the data
36+
X_train, X_test, y_train, y_test = train_test_split(X, y,
37+
random_state=RANDOM_STATE)
38+
39+
# Train the classifier with balancing
40+
pipeline.fit(X_train, y_train)
41+
42+
# Test the classifier and get the prediction
43+
y_pred_bal = pipeline.predict(X_test)
44+
45+
###############################################################################
46+
# The geometric mean corresponds to the square root of the product of the
47+
# sensitivity and specificity. Combining the two metrics should account for
48+
# the balancing of the dataset.
49+
50+
print('The geometric mean is {}'.format(geometric_mean_score(
51+
y_test,
52+
y_pred_bal)))
53+
54+
###############################################################################
55+
# The indexed balanced accuracy can transform any metric to be used in
56+
# imbalanced learning problems.
57+
58+
alpha = 0.1
59+
geo_mean = make_indexed_balanced_accuracy(alpha=alpha, squared=True)(
60+
geometric_mean_score)
61+
62+
print('The IBA using alpha = {} and the geometric mean: {}'.format(
63+
alpha, geo_mean(
64+
y_test,
65+
y_pred_bal)))
66+
67+
alpha = 0.5
68+
geo_mean = make_indexed_balanced_accuracy(alpha=alpha, squared=True)(
69+
geometric_mean_score)
70+
71+
print('The IBA using alpha = {} and the geometric mean: {}'.format(
72+
alpha, geo_mean(
73+
y_test,
74+
y_pred_bal)))

examples/model_selection/plot_validation_curve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
weights=[0.1, 0.9], n_informative=10,
3030
n_redundant=1, flip_y=0, n_features=20,
3131
n_clusters_per_class=4, n_samples=5000,
32-
random_state=10)
32+
random_state=RANDOM_STATE)
3333
smote = os.SMOTE(random_state=RANDOM_STATE)
3434
cart = tree.DecisionTreeClassifier(random_state=RANDOM_STATE)
3535
pipeline = pl.make_pipeline(smote, cart)

0 commit comments

Comments
 (0)