Skip to content

Commit 7b71f5b

Browse files
authored
MAINT compatibility scikit-learn 1.3 (#999)
1 parent 8a42dfc commit 7b71f5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1068
-405
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ version: 2
33
jobs:
44
doc:
55
docker:
6-
- image: circleci/python:3.7.7-buster
6+
- image: cimg/python:3.8.12
77
environment:
88
- USERNAME: "glemaitre"
99
- ORGANIZATION: "imbalanced-learn"

.pre-commit-config.yaml

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,23 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v2.3.0
3+
rev: v4.3.0
44
hooks:
55
- id: check-yaml
66
- id: end-of-file-fixer
77
- id: trailing-whitespace
88
- repo: https://github.com/psf/black
9-
rev: 22.3.0
9+
rev: 23.3.0
1010
hooks:
1111
- id: black
12-
- repo: https://gitlab.com/pycqa/flake8
13-
rev: 3.9.2
12+
- repo: https://github.com/astral-sh/ruff-pre-commit
13+
# Ruff version.
14+
rev: v0.0.272
1415
hooks:
15-
- id: flake8
16-
types: [file, python]
16+
- id: ruff
17+
args: ["--fix", "--show-source"]
1718
- repo: https://github.com/pre-commit/mirrors-mypy
18-
rev: v0.782
19+
rev: v1.3.0
1920
hooks:
2021
- id: mypy
21-
files: sklearn/
22+
files: imblearn/
2223
additional_dependencies: [pytest==6.2.4]
23-
- repo: https://github.com/PyCQA/isort
24-
rev: 5.10.1
25-
hooks:
26-
- id: isort

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
.. |PythonMinVersion| replace:: 3.8
3131
.. |NumPyMinVersion| replace:: 1.17.3
32-
.. |SciPyMinVersion| replace:: 1.3.2
32+
.. |SciPyMinVersion| replace:: 1.5.0
3333
.. |ScikitLearnMinVersion| replace:: 1.0.2
3434
.. |MatplotlibMinVersion| replace:: 3.1.2
3535
.. |PandasMinVersion| replace:: 1.0.5

azure-pipelines.yml

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
- job: git_commit
1212
displayName: Get Git Commit
1313
pool:
14-
vmImage: ubuntu-20.04
14+
vmImage: ubuntu-22.04
1515
steps:
1616
- bash: |
1717
set -ex
@@ -38,21 +38,21 @@ jobs:
3838
)
3939
displayName: Linting
4040
pool:
41-
vmImage: ubuntu-20.04
41+
vmImage: ubuntu-22.04
4242
steps:
4343
- task: UsePythonVersion@0
4444
inputs:
4545
versionSpec: '3.9'
4646
- bash: |
4747
# Include pytest compatibility with mypy
48-
pip install pytest flake8 mypy==0.782 black==22.3 isort
48+
pip install flake8 pytest mypy==1.3.0 black==23.3 ruff==0.0.272
4949
displayName: Install linters
5050
- bash: |
5151
black --check --diff .
5252
displayName: Run black
5353
- bash: |
54-
isort --check --diff .
55-
displayName: Run isort
54+
ruff check --show-source .
55+
displayName: Run ruff
5656
- bash: |
5757
./build_tools/azure/linting.sh
5858
displayName: Run linting
@@ -63,7 +63,7 @@ jobs:
6363
- template: build_tools/azure/posix.yml
6464
parameters:
6565
name: Linux_Nightly
66-
vmImage: ubuntu-20.04
66+
vmImage: ubuntu-22.04
6767
dependsOn: [git_commit, linting]
6868
condition: |
6969
and(
@@ -86,7 +86,7 @@ jobs:
8686
- template: build_tools/azure/posix.yml
8787
parameters:
8888
name: Linux_Runs
89-
vmImage: ubuntu-20.04
89+
vmImage: ubuntu-22.04
9090
dependsOn: [git_commit]
9191
condition: |
9292
and(
@@ -125,7 +125,7 @@ jobs:
125125
- template: build_tools/azure/posix.yml
126126
parameters:
127127
name: Linux
128-
vmImage: ubuntu-20.04
128+
vmImage: ubuntu-22.04
129129
dependsOn: [linting, git_commit]
130130
condition: |
131131
and(
@@ -144,7 +144,7 @@ jobs:
144144
THREADPOOLCTL_VERSION: 'min'
145145
COVERAGE: 'false'
146146
# Linux + Python 3.8 build with OpenBLAS and without SITE_JOBLIB
147-
py38_conda_defaults_openblas:
147+
py38_conda_conda_forge_openblas:
148148
DISTRIB: 'conda'
149149
CONDA_CHANNEL: 'conda-forge'
150150
PYTHON_VERSION: '3.8'
@@ -170,6 +170,13 @@ jobs:
170170
TEST_DOCSTRINGS: 'true'
171171
CHECK_WARNINGS: 'false'
172172
SKLEARN_VERSION: '1.1.3'
173+
pylatest_pip_openblas_sklearn_intermediate_bis:
174+
DISTRIB: 'conda-pip-latest'
175+
PYTHON_VERSION: '3.10'
176+
TEST_DOCS: 'true'
177+
TEST_DOCSTRINGS: 'true'
178+
CHECK_WARNINGS: 'false'
179+
SKLEARN_VERSION: '1.2.2'
173180
pylatest_pip_tensorflow:
174181
DISTRIB: 'conda-pip-latest-tensorflow'
175182
CONDA_CHANNEL: 'conda-forge'
@@ -263,12 +270,21 @@ jobs:
263270
CONDA_CHANNEL: 'conda-forge'
264271
CPU_COUNT: '3'
265272
TEST_DOCS: 'true'
266-
pylatest_conda_mkl_no_openmp:
273+
# TODO: re-enable when we find out why MKL on defaults segfaults
274+
# It seems that scikit-learn from defaults channel is built with LLVM/CLANG OMP
275+
# while we use MKL OMP. This could be the cause of the segfaults.
276+
# pylatest_conda_mkl_no_openmp:
277+
# DISTRIB: 'conda'
278+
# BLAS: 'mkl'
279+
# SKLEARN_SKIP_OPENMP_TEST: 'true'
280+
# CPU_COUNT: '3'
281+
# TEST_DOCS: 'true'
282+
conda_conda_forge_openblas:
267283
DISTRIB: 'conda'
268-
BLAS: 'mkl'
269-
SKLEARN_SKIP_OPENMP_TEST: 'true'
270-
CPU_COUNT: '3'
284+
CONDA_CHANNEL: 'conda-forge'
285+
BLAS: 'openblas'
271286
TEST_DOCS: 'true'
287+
CPU_COUNT: '3'
272288

273289
- template: build_tools/azure/windows.yml
274290
parameters:

build_tools/azure/linting.sh

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ set -e
44
# pipefail is necessary to propagate exit codes
55
set -o pipefail
66

7-
flake8 --show-source .
8-
echo -e "No problem detected by flake8\n"
9-
107
# For docstrings and warnings of deprecated attributes to be rendered
118
# properly, the property decorator must come before the deprecated decorator
129
# (else they are treated as functions)
@@ -33,11 +30,3 @@ then
3330
echo "$doctest_directive"
3431
exit 1
3532
fi
36-
37-
joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")"
38-
39-
if [ ! -z "$joblib_import" ]; then
40-
echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
41-
echo "$joblib_import"
42-
exit 1
43-
fi

build_tools/azure/test_script.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ mkdir -p $TEST_DIR
1212
cp setup.cfg $TEST_DIR
1313
cd $TEST_DIR
1414

15+
# python -c "import joblib; print(f'Number of cores (physical): \
16+
# {joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')"
17+
# python -c "import sklearn; sklearn.show_versions()"
1518
python -c "import imblearn; imblearn.show_versions()"
1619

1720
if ! command -v conda &> /dev/null

build_tools/circle/build_doc.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,13 @@ if [[ `type -t deactivate` ]]; then
8989
deactivate
9090
fi
9191

92-
# Install dependencies with miniconda
93-
wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
94-
-O miniconda.sh
95-
chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH
96-
export PATH="$MINICONDA_PATH/bin:$PATH"
97-
conda update --yes --quiet conda
92+
MAMBAFORGE_PATH=$HOME/mambaforge
93+
# Install dependencies with mamba
94+
wget -q https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
95+
-O mambaforge.sh
96+
chmod +x mambaforge.sh && ./mambaforge.sh -b -p $MAMBAFORGE_PATH
97+
export PATH="$MAMBAFORGE_PATH/bin:$PATH"
98+
mamba update --yes --quiet conda
9899

99100
# imports get_dep
100101
source build_tools/shared.sh

doc/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33

44
# You can set these variables from the command line.
5-
SPHINXOPTS =
5+
SPHINXOPTS = -v
66
SPHINXBUILD = sphinx-build
77
PAPER =
88
BUILDDIR = _build

doc/_templates/breadcrumbs.html

Lines changed: 0 additions & 4 deletions
This file was deleted.

doc/common_pitfalls.rst

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,9 @@ increase the effect of the wrongdoings::
5353

5454
Let's first check the balancing ratio on this dataset::
5555

56-
>>> y.value_counts(normalize=True)
57-
<=50K 0.98801
58-
>50K 0.01199
59-
Name: class, dtype: float64
56+
>>> from collections import Counter
57+
>>> {key: value / len(y) for key, value in Counter(y).items()}
58+
{'<=50K': 0.988..., '>50K': 0.011...}
6059

6160
To later highlight some of the issue, we will keep aside a left-out set that we
6261
will not use for the evaluation of the model::
@@ -72,7 +71,6 @@ classifier, without any preprocessing to alleviate the bias toward the majority
7271
class. We evaluate the generalization performance of the classifier via
7372
cross-validation::
7473

75-
>>> from sklearn.experimental import enable_hist_gradient_boosting
7674
>>> from sklearn.ensemble import HistGradientBoostingClassifier
7775
>>> from sklearn.model_selection import cross_validate
7876
>>> model = HistGradientBoostingClassifier(random_state=0)
@@ -130,9 +128,9 @@ cross-validation::
130128
... f"{cv_results['test_score'].std():.3f}"
131129
... )
132130
Balanced accuracy mean +/- std. dev.: 0.724 +/- 0.042
133-
134-
The cross-validation performance looks good, but evaluating the classifiers
135-
on the left-out data shows a different picture::
131+
132+
The cross-validation performance looks good, but evaluating the classifiers
133+
on the left-out data shows a different picture::
136134

137135
>>> scores = []
138136
>>> for fold_id, cv_model in enumerate(cv_results["estimator"]):
@@ -147,7 +145,7 @@ on the left-out data shows a different picture::
147145
... )
148146
Balanced accuracy mean +/- std. dev.: 0.698 +/- 0.014
149147

150-
We see that the performance is now worse than the cross-validated performance.
148+
We see that the performance is now worse than the cross-validated performance.
151149
Indeed, the data leakage gave us too optimistic results due to the reason
152150
stated earlier in this section.
153151

doc/over_sampling.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ randomly sampling with replacement the current available samples. The
3838
The augmented data set should be used instead of the original data set to train
3939
a classifier::
4040

41-
>>> from sklearn.svm import LinearSVC
42-
>>> clf = LinearSVC()
41+
>>> from sklearn.linear_model import LogisticRegression
42+
>>> clf = LogisticRegression()
4343
>>> clf.fit(X_resampled, y_resampled)
44-
LinearSVC(...)
44+
LogisticRegression(...)
4545

4646
In the figure below, we compare the decision functions of a classifier trained
4747
using the over-sampled data set and the original data set.
@@ -108,11 +108,11 @@ the same manner::
108108
>>> X_resampled, y_resampled = SMOTE().fit_resample(X, y)
109109
>>> print(sorted(Counter(y_resampled).items()))
110110
[(0, 4674), (1, 4674), (2, 4674)]
111-
>>> clf_smote = LinearSVC().fit(X_resampled, y_resampled)
111+
>>> clf_smote = LogisticRegression().fit(X_resampled, y_resampled)
112112
>>> X_resampled, y_resampled = ADASYN().fit_resample(X, y)
113113
>>> print(sorted(Counter(y_resampled).items()))
114114
[(0, 4673), (1, 4662), (2, 4674)]
115-
>>> clf_adasyn = LinearSVC().fit(X_resampled, y_resampled)
115+
>>> clf_adasyn = LogisticRegression().fit(X_resampled, y_resampled)
116116

117117
The figure below illustrates the major difference of the different
118118
over-sampling methods.

doc/sphinxext/sphinx_issues.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ def cve_role(name, rawtext, text, lineno, inliner, options=None, content=None):
7676

7777

7878
class IssueRole(object):
79-
8079
EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$")
8180

8281
def __init__(

examples/applications/plot_impact_imbalanced_classes.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,6 @@
338338
# classifier within a :class:`~imblearn.ensemble.BalancedBaggingClassifier`.
339339

340340
from sklearn.ensemble import HistGradientBoostingClassifier
341-
from sklearn.experimental import enable_hist_gradient_boosting # noqa
342341

343342
from imblearn.ensemble import BalancedBaggingClassifier
344343

examples/applications/plot_multi_class_under_sampling.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,7 @@
4343
print(f"Testing target statistics: {Counter(y_test)}")
4444

4545
# Create a pipeline
46-
pipeline = make_pipeline(
47-
NearMiss(version=2), StandardScaler(), LogisticRegression(random_state=RANDOM_STATE)
48-
)
46+
pipeline = make_pipeline(NearMiss(version=2), StandardScaler(), LogisticRegression())
4947
pipeline.fit(X_train, y_train)
5048

5149
# Classify and report the results

examples/combine/plot_comparison_combine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def plot_decision_function(X, y, clf, ax):
102102
# :class:`~imblearn.combine.SMOTEENN` cleans more noisy data than
103103
# :class:`~imblearn.combine.SMOTETomek`.
104104

105-
from sklearn.svm import LinearSVC
105+
from sklearn.linear_model import LogisticRegression
106106

107107
from imblearn.combine import SMOTEENN, SMOTETomek
108108

@@ -114,7 +114,7 @@ def plot_decision_function(X, y, clf, ax):
114114

115115
fig, axs = plt.subplots(3, 2, figsize=(15, 25))
116116
for ax, sampler in zip(axs, samplers):
117-
clf = make_pipeline(sampler, LinearSVC()).fit(X, y)
117+
clf = make_pipeline(sampler, LogisticRegression()).fit(X, y)
118118
plot_decision_function(X, y, clf, ax[0])
119119
plot_resampling(X, y, sampler, ax[1])
120120
fig.tight_layout()

examples/evaluation/plot_classification_report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515

1616
from sklearn import datasets
17+
from sklearn.linear_model import LogisticRegression
1718
from sklearn.model_selection import train_test_split
1819
from sklearn.preprocessing import StandardScaler
19-
from sklearn.svm import LinearSVC
2020

2121
from imblearn import over_sampling as os
2222
from imblearn import pipeline as pl
@@ -43,7 +43,7 @@
4343
pipeline = pl.make_pipeline(
4444
StandardScaler(),
4545
os.SMOTE(random_state=RANDOM_STATE),
46-
LinearSVC(max_iter=10_000, random_state=RANDOM_STATE),
46+
LogisticRegression(max_iter=10_000),
4747
)
4848

4949
# Split the data

examples/evaluation/plot_metrics.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@
4848

4949
# %% [markdown]
5050
# We will create a pipeline made of a :class:`~imblearn.over_sampling.SMOTE`
51-
# over-sampler followed by a :class:`~sklearn.svm.LinearSVC` classifier.
51+
# over-sampler followed by a :class:`~sklearn.linear_model.LogisticRegression`
52+
# classifier.
5253

54+
from sklearn.linear_model import LogisticRegression
5355
from sklearn.preprocessing import StandardScaler
54-
from sklearn.svm import LinearSVC
5556

5657
from imblearn.over_sampling import SMOTE
5758

@@ -61,7 +62,7 @@
6162
model = make_pipeline(
6263
StandardScaler(),
6364
SMOTE(random_state=RANDOM_STATE),
64-
LinearSVC(max_iter=10_000, random_state=RANDOM_STATE),
65+
LogisticRegression(max_iter=10_000, random_state=RANDOM_STATE),
6566
)
6667

6768
# %% [markdown]

0 commit comments

Comments
 (0)