diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst index c807b80d..45536a7d 100644 --- a/doc/changes/0.5.rst +++ b/doc/changes/0.5.rst @@ -2,4 +2,7 @@ Version 0.5 (in progress) ------------------------- + +- jit-compile datafits and penalties inside ``Solver.solve`` method (:gh:`270`) +- Datafits are now initialized inside ``Solver.solve`` method (:gh:`295`) - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`) diff --git a/examples/plot_survival_analysis.py b/examples/plot_survival_analysis.py index 93e8c434..5897836c 100644 --- a/examples/plot_survival_analysis.py +++ b/examples/plot_survival_analysis.py @@ -69,9 +69,8 @@ # skglm internals: init datafit and penalty datafit = Cox() -penalty = L1(alpha) - datafit.initialize(X, y) +penalty = L1(alpha) # init solver solver = ProxNewton(fit_intercept=False, max_iter=50) diff --git a/skglm/estimators.py b/skglm/estimators.py index 4d7ddd92..22a2159c 100644 --- a/skglm/estimators.py +++ b/skglm/estimators.py @@ -102,11 +102,6 @@ def _glm_fit(X, y, model, datafit, penalty, solver): n_samples, n_features = X_.shape - if issparse(X): - datafit.initialize_sparse(X_.data, X_.indptr, X_.indices, y) - else: - datafit.initialize(X_, y) - # if model.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: if solver.warm_start and hasattr(model, 'coef_') and model.coef_ is not None: if isinstance(datafit, QuadraticSVC): @@ -1374,12 +1369,6 @@ def fit(self, X, y): fit_intercept=False, ) - # solve problem - if not issparse(X): - datafit.initialize(X, y) - else: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - w, _, stop_crit = solver.solve(X, y, datafit, penalty) # save to attribute diff --git a/skglm/solvers/anderson_cd.py b/skglm/solvers/anderson_cd.py index d39a2408..b8bf90bc 100644 --- a/skglm/solvers/anderson_cd.py +++ b/skglm/solvers/anderson_cd.py @@ -80,10 +80,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): is_sparse = sparse.issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, y) else: - datafit.initialize(X, y) lipschitz = datafit.get_lipschitz(X, y) if len(w) != n_features + self.fit_intercept: diff --git a/skglm/solvers/base.py b/skglm/solvers/base.py index a550eaa7..e91289fc 100644 --- a/skglm/solvers/base.py +++ b/skglm/solvers/base.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC import numpy as np +from scipy.sparse import issparse from skglm.utils.validation import check_attrs from skglm.utils.jit_compilation import compiled_clone @@ -40,6 +41,8 @@ class BaseSolver(ABC): def _solve(self, X, y, datafit, penalty, w_init, Xw_init): """Solve an optimization problem. + This method assumes that datafit was already initialized. + Parameters ---------- X : array, shape (n_samples, n_features) @@ -95,7 +98,8 @@ def custom_checks(self, X, y, datafit, penalty): pass def solve( - self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, run_checks=True + self, X, y, datafit, penalty, w_init=None, Xw_init=None, *, + run_checks=True, initialize_datafit=True ): """Solve the optimization problem after validating its compatibility. @@ -133,6 +137,13 @@ def solve( if run_checks: self._validate(X, y, datafit, penalty) + # check for None as `GramCD` solver take `None` as datafit + if datafit is not None and initialize_datafit: + if issparse(X): + datafit.initialize_sparse(X.data, X.indptr, X.indices, y) + else: + datafit.initialize(X, y) + return self._solve(X, y, datafit, penalty, w_init, Xw_init) def _validate(self, X, y, datafit, penalty): diff --git a/skglm/solvers/fista.py b/skglm/solvers/fista.py index ccd35db8..e0933a11 100644 --- a/skglm/solvers/fista.py +++ b/skglm/solvers/fista.py @@ -51,12 +51,10 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): Xw = Xw_init.copy() if Xw_init is not None else np.zeros(n_samples) if X_is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_global_lipschitz_sparse( X.data, X.indptr, X.indices, y ) else: - datafit.initialize(X, y) lipschitz = datafit.get_global_lipschitz(X, y) for n_iter in range(self.max_iter): diff --git a/skglm/solvers/group_bcd.py b/skglm/solvers/group_bcd.py index c7b515da..955cdf22 100644 --- a/skglm/solvers/group_bcd.py +++ b/skglm/solvers/group_bcd.py @@ -76,10 +76,8 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): is_sparse = issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, y) else: - datafit.initialize(X, y) lipschitz = datafit.get_lipschitz(X, y) all_groups = np.arange(n_groups) diff --git a/skglm/solvers/group_prox_newton.py b/skglm/solvers/group_prox_newton.py index d717e8fb..1492651c 100644 --- a/skglm/solvers/group_prox_newton.py +++ b/skglm/solvers/group_prox_newton.py @@ -69,13 +69,6 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): stop_crit = 0. p_objs_out = [] - # TODO: to be isolated in a seperated method - is_sparse = issparse(X) - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - for iter in range(self.max_iter): grad = _construct_grad(X, y, w, Xw, datafit, all_groups) diff --git a/skglm/solvers/lbfgs.py b/skglm/solvers/lbfgs.py index 854be64e..5d051f4b 100644 --- a/skglm/solvers/lbfgs.py +++ b/skglm/solvers/lbfgs.py @@ -38,13 +38,6 @@ def __init__(self, max_iter=50, tol=1e-4, verbose=False): def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): - # TODO: to be isolated in a seperated method - is_sparse = issparse(X) - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - def objective(w): Xw = X @ w datafit_value = datafit.value(y, w, Xw) diff --git a/skglm/solvers/multitask_bcd.py b/skglm/solvers/multitask_bcd.py index 5a8dfa5e..7c231b80 100644 --- a/skglm/solvers/multitask_bcd.py +++ b/skglm/solvers/multitask_bcd.py @@ -54,10 +54,8 @@ def _solve(self, X, Y, datafit, penalty, W_init=None, XW_init=None): is_sparse = sparse.issparse(X) if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, Y) lipschitz = datafit.get_lipschitz_sparse(X.data, X.indptr, X.indices, Y) else: - datafit.initialize(X, Y) lipschitz = datafit.get_lipschitz(X, Y) for t in range(self.max_iter): diff --git a/skglm/solvers/prox_newton.py b/skglm/solvers/prox_newton.py index baf05523..76867c7d 100644 --- a/skglm/solvers/prox_newton.py +++ b/skglm/solvers/prox_newton.py @@ -85,12 +85,6 @@ def _solve(self, X, y, datafit, penalty, w_init=None, Xw_init=None): if is_sparse: X_bundles = (X.data, X.indptr, X.indices) - # TODO: to be isolated in a seperated method - if is_sparse: - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - else: - datafit.initialize(X, y) - if self.ws_strategy == "fixpoint": X_square = X.multiply(X) if is_sparse else X ** 2 diff --git a/skglm/tests/test_estimators.py b/skglm/tests/test_estimators.py index 954ca225..f0df20a8 100644 --- a/skglm/tests/test_estimators.py +++ b/skglm/tests/test_estimators.py @@ -207,6 +207,8 @@ def test_CoxEstimator(use_efron, use_float_32): datafit = Cox(use_efron) penalty = L1(alpha) + # XXX: intialize is needed here although it is done in ProxNewton + # it is used to evaluate the objective datafit.initialize(X, y) w, *_ = ProxNewton( @@ -256,8 +258,6 @@ def test_CoxEstimator_sparse(use_efron, use_float_32): datafit = Cox(use_efron) penalty = L1(alpha) - datafit.initialize_sparse(X.data, X.indptr, X.indices, y) - *_, stop_crit = ProxNewton( fit_intercept=False, tol=1e-6, max_iter=50 ).solve( diff --git a/skglm/tests/test_lbfgs_solver.py b/skglm/tests/test_lbfgs_solver.py index 878e8c7d..6c4d51f1 100644 --- a/skglm/tests/test_lbfgs_solver.py +++ b/skglm/tests/test_lbfgs_solver.py @@ -62,7 +62,7 @@ def test_L2_Cox(use_efron): penalty = L2(alpha) # XXX: intialize is needed here although it is done in LBFGS - # is used to evaluate the objective + # it is used to evaluate the objective datafit.initialize(X, y) w, *_ = LBFGS().solve(X, y, datafit, penalty)