|
| 1 | +""" |
| 2 | +============================================= |
| 3 | +Multiclass classification with under-sampling |
| 4 | +============================================= |
| 5 | +
|
| 6 | +Some balancing methods allow for balancing dataset with multiples classes. |
| 7 | +We provide an example to illustrate the use of those methods which do |
| 8 | +not differ from the binary case. |
| 9 | +
|
| 10 | +""" |
| 11 | + |
| 12 | +from sklearn.datasets import load_iris |
| 13 | +from sklearn.svm import LinearSVC |
| 14 | +from sklearn.model_selection import train_test_split |
| 15 | + |
| 16 | +from imblearn.under_sampling import NearMiss |
| 17 | +from imblearn.pipeline import make_pipeline |
| 18 | +from imblearn.metrics import classification_report_imbalanced |
| 19 | + |
| 20 | +print(__doc__) |
| 21 | + |
| 22 | +RANDOM_STATE = 42 |
| 23 | + |
| 24 | +# Create a folder to fetch the dataset |
| 25 | +iris = load_iris() |
| 26 | +# Make the dataset imbalanced |
| 27 | +# Select only half of the first class |
| 28 | +iris.data = iris.data[25:-1, :] |
| 29 | +iris.target = iris.target[25:-1] |
| 30 | + |
| 31 | +X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, |
| 32 | + random_state=RANDOM_STATE) |
| 33 | + |
| 34 | +# Create a pipeline |
| 35 | +pipeline = make_pipeline(NearMiss(version=2), LinearSVC()) |
| 36 | +pipeline.fit(X_train, y_train) |
| 37 | + |
| 38 | +# Classify and report the results |
| 39 | +print(classification_report_imbalanced(y_test, pipeline.predict(X_test))) |
0 commit comments