From cc690bd0fab5c3411e0cf5d007ed6606a0cba24c Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 21:00:43 +0200 Subject: [PATCH 1/8] init inside `solve` method --- skglm/solvers/base.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/skglm/solvers/base.py b/skglm/solvers/base.py index a550eaa7..1ac70bbf 100644 --- a/skglm/solvers/base.py +++ b/skglm/solvers/base.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC import numpy as np +from scipy.sparse import issparse from skglm.utils.validation import check_attrs from skglm.utils.jit_compilation import compiled_clone @@ -40,6 +41,8 @@ class BaseSolver(ABC): def _solve(self, X, y, datafit, penalty, w_init, Xw_init): """Solve an optimization problem. + This method assumes that datafit was already initialized. + Parameters ---------- X : array, shape (n_samples, n_features) @@ -95,7 +98,8 @@ def custom_checks(self, X, y, datafit, penalty): pass def solve( - self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True + self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, + run_checks=True, initialize_datafit=True ): """Solve the optimization problem after validating its compatibility. @@ -133,6 +137,13 @@ def solve( if run_checks: self._validate(X, y, datafit, penalty) + # check for None as GramCD solver doesn't take None as datafit + if datafit is not None and initialize_datafit: + if issparse(X): + datafit.initialize_sparse(X.data, X.indptr, X.indices, y) + else: + datafit.initialize(X, y) + return self._solve(X, y, datafit, penalty, w_init, Xw_init) def _validate(self, X, y, datafit, penalty): From 3453825d46e993bd5cd22f20da23b0fcc94a4556 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 21:00:59 +0200 Subject: [PATCH 2/8] rm init inside solvers --- skglm/solvers/anderson_cd.py | 2 -- skglm/solvers/fista.py | 2 -- skglm/solvers/group_bcd.py | 2 -- skglm/solvers/group_prox_newton.py | 7 ------- skglm/solvers/lbfgs.py | 7 ------- skglm/solvers/multitask_bcd.py | 2 -- skglm/solvers/prox_newton.py | 6 ------ 7 files changed, 28 deletions(-) diff --git a/skglm/solvers/anderson_cd.py b/skglm/solvers/anderson_cd.py index d39a2408..b8bf90bc 100644 --- a/skglm/solvers/anderson_cd.py +++ b/skglm/solvers/anderson_cd.py @@ -80,10 +80,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): is_sparse = sparse.issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, y) else: - datafit.initialize(X, y) lipschitz = datafit.get_lipschitz(X, y) if len(w) != n_features + self.fit_intercept: diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py index ccd35db8..e0933a11 100644 --- a/skglm/solvers/fista.py +++ b/skglm/solvers/fista.py @@ -51,12 +51,10 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples) if X_is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_global_lipschitz_sparse( X.data, X.indptr, X.indices, y ) else: - datafit.initialize(X, y) lipschitz = datafit.get_global_lipschitz(X, y) for n_iter in range(self.max_iter): diff --git a/skglm/solvers/group_bcd.py b/skglm/solvers/group_bcd.py index c7b515da..955cdf22 100644 --- a/skglm/solvers/group_bcd.py +++ b/skglm/solvers/group_bcd.py @@ -76,10 +76,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): is_sparse = issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, y) else: - datafit.initialize(X, y) lipschitz = datafit.get_lipschitz(X, y) all_groups = np.arange(n_groups) diff --git a/skglm/solvers/group_prox_newton.py b/skglm/solvers/group_prox_newton.py index d717e8fb..1492651c 100644 --- a/skglm/solvers/group_prox_newton.py +++ b/skglm/solvers/group_prox_newton.py @@ -69,13 +69,6 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): stop_crit = 0. p_objs_out = [] - # TODO: to be isolated in a seperated method - is_sparse = issparse(X) - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - for iter in range(self.max_iter): grad = _construct_grad(X, y, w, Xw, datafit, all_groups) diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py index 854be64e..5d051f4b 100644 --- a/skglm/solvers/lbfgs.py +++ b/skglm/solvers/lbfgs.py @@ -38,13 +38,6 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False): def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): - # TODO: to be isolated in a seperated method - is_sparse = issparse(X) - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - def objective(w): Xw = X @ w datafit_value = datafit.value(y, w, Xw) diff --git a/skglm/solvers/multitask_bcd.py b/skglm/solvers/multitask_bcd.py index 5a8dfa5e..7c231b80 100644 --- a/skglm/solvers/multitask_bcd.py +++ b/skglm/solvers/multitask_bcd.py @@ -54,10 +54,8 @@ def _solve(self, X, Y, datafit, penalty, W_init=None, XW_init=None): is_sparse = sparse.issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, Y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, Y) else: - datafit.initialize(X, Y) lipschitz = datafit.get_lipschitz(X, Y) for t in range(self.max_iter): diff --git a/skglm/solvers/prox_newton.py b/skglm/solvers/prox_newton.py index baf05523..76867c7d 100644 --- a/skglm/solvers/prox_newton.py +++ b/skglm/solvers/prox_newton.py @@ -85,12 +85,6 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): if is_sparse: X_bundles = (X.data, X.indptr, X.indices) - # TODO: to be isolated in a seperated method - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - if self.ws_strategy == "fixpoint": X_square = X.multiply(X) if is_sparse else X ** 2 From 68eb0a8e10a27955cfea20d71ac314d0a7696a26 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 21:01:15 +0200 Subject: [PATCH 3/8] rm init inside estimators --- skglm/estimators.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/skglm/estimators.py b/skglm/estimators.py index c161f532..79782a06 100644 --- a/skglm/estimators.py +++ b/skglm/estimators.py @@ -101,10 +101,10 @@ def _glm_fit(X, y, model, datafit, penalty, solver): n_samples, n_features = X_.shape - if issparse(X): - datafit.initialize_sparse(X_.data, X_.indptr, X_.indices, y) - else: - datafit.initialize(X_, y) + # if issparse(X): + # datafit.initialize_sparse(X_.data, X_.indptr, X_.indices, y) + # else: + # datafit.initialize(X_, y) # if model.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: if solver.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: @@ -1373,11 +1373,11 @@ def fit(self, X, y): fit_intercept=False, ) - # solve problem - if not issparse(X): - datafit.initialize(X, y) - else: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) + # # solve problem + # if not issparse(X): + # datafit.initialize(X, y) + # else: + # datafit.initialize_sparse(X.data, X.indptr, X.indices, y) w, _, stop_crit = solver.solve(X, y, datafit, penalty) From 2f7fa7571c53bd6e5b742854d6cea812455c3752 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 21:08:26 +0200 Subject: [PATCH 4/8] stale commit --- skglm/estimators.py | 11 ----------- skglm/solvers/base.py | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/skglm/estimators.py b/skglm/estimators.py index 79782a06..8fe8e8e9 100644 --- a/skglm/estimators.py +++ b/skglm/estimators.py @@ -101,11 +101,6 @@ def _glm_fit(X, y, model, datafit, penalty, solver): n_samples, n_features = X_.shape - # if issparse(X): - # datafit.initialize_sparse(X_.data, X_.indptr, X_.indices, y) - # else: - # datafit.initialize(X_, y) - # if model.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: if solver.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: if isinstance(datafit, QuadraticSVC): @@ -1373,12 +1368,6 @@ def fit(self, X, y): fit_intercept=False, ) - # # solve problem - # if not issparse(X): - # datafit.initialize(X, y) - # else: - # datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - w, _, stop_crit = solver.solve(X, y, datafit, penalty) # save to attribute diff --git a/skglm/solvers/base.py b/skglm/solvers/base.py index 1ac70bbf..e91289fc 100644 --- a/skglm/solvers/base.py +++ b/skglm/solvers/base.py @@ -137,7 +137,7 @@ def solve( if run_checks: self._validate(X, y, datafit, penalty) - # check for None as GramCD solver doesn't take None as datafit + # check for None as `GramCD` solver take `None` as datafit if datafit is not None and initialize_datafit: if issparse(X): datafit.initialize_sparse(X.data, X.indptr, X.indices, y) From 40f91c5f9b2deae28b70f3919b5c2ec1af6192e1 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 21:11:45 +0200 Subject: [PATCH 5/8] update what's new --- doc/changes/0.5.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst index cf65cee7..f55cb139 100644 --- a/doc/changes/0.5.rst +++ b/doc/changes/0.5.rst @@ -2,3 +2,6 @@ Version 0.5 (in progress) ------------------------- + +- jit-compile datafits and penalties inside ``Solver.solve`` method (:gh:`270`) +- Datafits are now initialized inside ``Solver.solve`` method (:gh:`295`) From 680e01b2d09ed92dd1d744b13118af48f0a33164 Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sat, 19 Apr 2025 23:35:18 +0200 Subject: [PATCH 6/8] pass examples --- examples/plot_survival_analysis.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/plot_survival_analysis.py b/examples/plot_survival_analysis.py index 93e8c434..d6836e67 100644 --- a/examples/plot_survival_analysis.py +++ b/examples/plot_survival_analysis.py @@ -71,8 +71,6 @@ datafit = Cox() penalty = L1(alpha) -datafit.initialize(X, y) - # init solver solver = ProxNewton(fit_intercept=False, max_iter=50) @@ -230,7 +228,6 @@ # ensure using Efron estimate datafit = Cox(use_efron=True) -datafit.initialize(X, y) # solve the problem solver = ProxNewton(fit_intercept=False, max_iter=50) From 3be2424dd21ca109fde125067ad127a5761313dc Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Sun, 20 Apr 2025 00:17:35 +0200 Subject: [PATCH 7/8] pass test files --- skglm/tests/test_estimators.py | 4 ++-- skglm/tests/test_lbfgs_solver.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/skglm/tests/test_estimators.py b/skglm/tests/test_estimators.py index 954ca225..f0df20a8 100644 --- a/skglm/tests/test_estimators.py +++ b/skglm/tests/test_estimators.py @@ -207,6 +207,8 @@ def test_CoxEstimator(use_efron, use_float_32): datafit = Cox(use_efron) penalty = L1(alpha) + # XXX: intialize is needed here although it is done in ProxNewton + # it is used to evaluate the objective datafit.initialize(X, y) w, *_ = ProxNewton( @@ -256,8 +258,6 @@ def test_CoxEstimator_sparse(use_efron, use_float_32): datafit = Cox(use_efron) penalty = L1(alpha) - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - *_, stop_crit = ProxNewton( fit_intercept=False, tol=1e-6, max_iter=50 ).solve( diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py index 878e8c7d..6c4d51f1 100644 --- a/skglm/tests/test_lbfgs_solver.py +++ b/skglm/tests/test_lbfgs_solver.py @@ -62,7 +62,7 @@ def test_L2_Cox(use_efron): penalty = L2(alpha) # XXX: intialize is needed here although it is done in LBFGS - # is used to evaluate the objective + # it is used to evaluate the objective datafit.initialize(X, y) w, *_ = LBFGS().solve(X, y, datafit, penalty) From 55ed62a8f09430bc8bfd20a522c3bf2d20ff1d8d Mon Sep 17 00:00:00 2001 From: Badr-MOUFAD Date: Mon, 21 Apr 2025 22:15:23 +0200 Subject: [PATCH 8/8] fix plot doc --- examples/plot_survival_analysis.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/plot_survival_analysis.py b/examples/plot_survival_analysis.py index d6836e67..5897836c 100644 --- a/examples/plot_survival_analysis.py +++ b/examples/plot_survival_analysis.py @@ -69,6 +69,7 @@ # skglm internals: init datafit and penalty datafit = Cox() +datafit.initialize(X, y) penalty = L1(alpha) # init solver @@ -228,6 +229,7 @@ # ensure using Efron estimate datafit = Cox(use_efron=True) +datafit.initialize(X, y) # solve the problem solver = ProxNewton(fit_intercept=False, max_iter=50)