From 21f14599eb46305adefcd501b1eae38b85bcfe30 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Fri, 23 May 2025 14:30:49 +0200 Subject: [PATCH 01/16] first try at simple quantile huber --- examples/plot_smooth_quantile.py | 75 +++++++++++ skglm/experimental/__init__.py | 4 + skglm/experimental/quantile_huber.py | 124 ++++++++++++++++++ .../experimental/smooth_quantile_regressor.py | 75 +++++++++++ 4 files changed, 278 insertions(+) create mode 100644 examples/plot_smooth_quantile.py create mode 100644 skglm/experimental/quantile_huber.py create mode 100644 skglm/experimental/smooth_quantile_regressor.py diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py new file mode 100644 index 000000000..9232c815f --- /dev/null +++ b/examples/plot_smooth_quantile.py @@ -0,0 +1,75 @@ +""" +=========================================== +Smooth Quantile Regression Example +=========================================== + +""" + +import numpy as np +import matplotlib.pyplot as plt +import time +from sklearn.datasets import make_regression +from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import QuantileRegressor +from skglm.experimental.smooth_quantile_regressor import SmoothQuantileRegressor +from skglm.experimental.quantile_huber import QuantileHuber + +X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) +X = StandardScaler().fit_transform(X) +tau = 0.75 + +t0 = time.time() +reg_skglm = SmoothQuantileRegressor(quantile=tau).fit(X, y) +t1 = time.time() +reg_sklearn = QuantileRegressor(quantile=tau, alpha=0.1, solver='highs').fit(X, y) +t2 = time.time() + +y_pred_skglm, y_pred_sklearn = reg_skglm.predict(X), reg_sklearn.predict(X) +coverage_skglm = np.mean(y <= y_pred_skglm) +coverage_sklearn = np.mean(y <= y_pred_sklearn) + +print(f"\nTiming: skglm={t1-t0:.3f}s, sklearn={t2-t1:.3f}s, " + f"speedup={(t2-t1)/(t1-t0):.1f}x") +print(f"Coverage (target {tau}): skglm={coverage_skglm:.3f}, " + f"sklearn={coverage_sklearn:.3f}") +print(f"Non-zero coefs: skglm={np.sum(reg_skglm.coef_ != 0)}, " + f"sklearn={np.sum(reg_sklearn.coef_ != 0)}") + + +# Visualizations +def pinball(y_true, y_pred): + diff = y_true - y_pred + return np.mean(np.where(diff >= 0, tau * diff, (1 - tau) * -diff)) + + +print(f"Pinball loss: skglm={pinball(y, y_pred_skglm):.4f}, " + f"sklearn={pinball(y, y_pred_sklearn):.4f}") + +plt.figure(figsize=(12, 5)) +plt.subplot(121) +residuals = np.linspace(-2, 2, 1000) +for delta in [1.0, 0.5, 0.1]: + loss = QuantileHuber(quantile=tau, delta=delta) + losses = [loss.value(np.array([r]), np.array([[1]]), np.array([0])) + for r in residuals] + plt.plot(residuals, losses, label=f'δ={delta}') +plt.plot(residuals, [tau * max(r, 0) + (1 - tau) * max(-r, 0) + for r in residuals], 'k--', label='Pinball') +plt.axvline(x=0, color='k', linestyle='--', alpha=0.3) +plt.xlabel('Residual (y - y_pred)') +plt.ylabel('Loss') +plt.title('Quantile Huber Loss (τ=0.75)') +plt.legend() +plt.grid(True, alpha=0.3) + +plt.subplot(122) +plt.hist(y - y_pred_skglm, bins=50, alpha=0.5, label='skglm') +plt.hist(y - y_pred_sklearn, bins=50, alpha=0.5, label='sklearn') +plt.axvline(0, color='k', linestyle='--') +plt.xlabel('Residual (y - y_pred)') +plt.ylabel('Count') +plt.title('Residuals Histogram') +plt.legend() +plt.grid(True, alpha=0.3) +plt.tight_layout() +plt.show() diff --git a/skglm/experimental/__init__.py b/skglm/experimental/__init__.py index eecf75b77..75ecb3dc7 100644 --- a/skglm/experimental/__init__.py +++ b/skglm/experimental/__init__.py @@ -2,6 +2,8 @@ from .sqrt_lasso import SqrtLasso, SqrtQuadratic from .pdcd_ws import PDCD_WS from .quantile_regression import Pinball +from .quantile_huber import QuantileHuber +from .smooth_quantile_regressor import SmoothQuantileRegressor __all__ = [ IterativeReweightedL1, @@ -9,4 +11,6 @@ Pinball, SqrtQuadratic, SqrtLasso, + QuantileHuber, + SmoothQuantileRegressor, ] diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py new file mode 100644 index 000000000..9435d1d50 --- /dev/null +++ b/skglm/experimental/quantile_huber.py @@ -0,0 +1,124 @@ +import numpy as np +from numba import float64 +from skglm.datafits.single_task import Huber +from skglm.utils.sparse_ops import spectral_norm + + +class QuantileHuber(Huber): + r"""Quantile Huber loss for quantile regression. + + Implements the smoothed pinball loss with quadratic region: + + .. math:: + + \rho_\tau^\delta(r) = + \begin{cases} + \tau\, r - \dfrac{\delta}{2}, & \text{if } r \ge \delta,\\ + \dfrac{\tau r^{2}}{2\delta}, & \text{if } 0 \le r < \delta,\\ + \dfrac{(1-\tau) r^{2}}{2\delta}, & \text{if } -\delta < r < 0,\\ + (\tau - 1)\, r - \dfrac{\delta}{2}, & \text{if } r \le -\delta. + \end{cases} + + Parameters + ---------- + quantile : float, default=0.5 + Desired quantile level between 0 and 1. + delta : float, default=1.0 + Width of quadratic region. + + References + ---------- + Chen, C. (2007). A Finite Smoothing Algorithm for Quantile Regression. + Journal of Computational and Graphical Statistics, 16(1), 136–164. + http://www.jstor.org/stable/27594233 + """ + + def __init__(self, quantile=0.5, delta=1.0): + if not 0 < quantile < 1: + raise ValueError("quantile must be between 0 and 1") + self.delta = float(delta) + self.quantile = float(quantile) + + def get_spec(self): + return (('delta', float64), ('quantile', float64)) + + def params_to_dict(self): + return dict(delta=self.delta, quantile=self.quantile) + + def _loss_and_grad_scalar(self, residual): + """Calculate loss and gradient for a single residual.""" + tau = self.quantile + delta = self.delta + abs_r = abs(residual) + + # Quadratic core: |r| ≤ delta + if abs_r <= delta: + if residual >= 0: + # 0 ≤ r ≤ delta + loss = tau * residual**2 / (2 * delta) + grad = tau * residual / delta + else: + # -delta ≤ r < 0 + loss = (1 - tau) * residual**2 / (2 * delta) + grad = (1 - tau) * residual / delta + return loss, grad + + # Linear tails: |r| > delta + if residual > delta: + loss = tau * (residual - delta / 2) + grad = tau + return loss, grad + else: + loss = (1 - tau) * (-residual - delta / 2) + grad = tau - 1 + return loss, grad + + def value(self, y, w, Xw): + """Compute the quantile Huber loss value.""" + residuals = y - Xw + loss = np.zeros_like(residuals) + for i, r in enumerate(residuals): + loss[i], _ = self._loss_and_grad_scalar(r) + return np.mean(loss) + + def raw_grad(self, y, Xw): + """Compute gradient of datafit w.r.t Xw.""" + residuals = y - Xw + grad = np.zeros_like(residuals) + for i, r in enumerate(residuals): + _, grad[i] = self._loss_and_grad_scalar(r) + return -grad + + def get_lipschitz(self, X, y): + """Compute coordinate-wise Lipschitz constants.""" + weight = max(self.quantile, 1 - self.quantile) + return weight * (X ** 2).sum(axis=0) / (len(y) * self.delta) + + def get_global_lipschitz(self, X, y): + """Compute global Lipschitz constant.""" + weight = max(self.quantile, 1 - self.quantile) + return weight * np.linalg.norm(X, 2) ** 2 / (len(y) * self.delta) + + def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y): + """Compute coordinate-wise Lipschitz constants for sparse X.""" + n_samples = len(y) + weight = max(self.quantile, 1 - self.quantile) + n_features = len(X_indptr) - 1 + lipschitz = np.zeros(n_features, dtype=X_data.dtype) + for j in range(n_features): + nrm2 = 0.0 + for idx in range(X_indptr[j], X_indptr[j + 1]): + nrm2 += X_data[idx] ** 2 + lipschitz[j] = weight * nrm2 / (n_samples * self.delta) + return lipschitz + + def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y): + """Compute global Lipschitz constant for sparse X.""" + n_samples = len(y) + weight = max(self.quantile, 1 - self.quantile) + return weight * spectral_norm( + X_data, X_indptr, X_indices, n_samples + ) ** 2 / (n_samples * self.delta) + + def intercept_update_step(self, y, Xw): + return -np.mean(self.raw_grad(y, Xw)) diff --git a/skglm/experimental/smooth_quantile_regressor.py b/skglm/experimental/smooth_quantile_regressor.py new file mode 100644 index 000000000..73c9e8147 --- /dev/null +++ b/skglm/experimental/smooth_quantile_regressor.py @@ -0,0 +1,75 @@ +import numpy as np +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_X_y, check_array +from ..solvers import FISTA +from ..penalties import L1 +from ..estimators import GeneralizedLinearEstimator +from .quantile_huber import QuantileHuber + + +class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): + """Quantile regression with progressive smoothing using Huberized loss.""" + + def __init__(self, quantile=0.75, alpha=1e-8, max_iter=1000, tol=1e-6, + delta_init=1.0, delta_final=1e-4, n_deltas=10, fit_intercept=True): + self.quantile = quantile + self.alpha = alpha + self.max_iter = max_iter + self.tol = tol + self.delta_init = delta_init + self.delta_final = delta_final + self.n_deltas = n_deltas + self.fit_intercept = fit_intercept + self.intercept_ = 0.0 + + def fit(self, X, y): + """Fit using FISTA with decreasing smoothing parameter delta. + + For each delta level: + - Update coefficients using FISTA + - Update intercept using gradient step + """ + X, y = check_X_y(X, y) + w = np.zeros(X.shape[1]) + intercept = np.quantile(y, self.quantile) if self.fit_intercept else 0.0 + + for delta in np.geomspace(self.delta_init, self.delta_final, self.n_deltas): + datafit = QuantileHuber(quantile=self.quantile, delta=delta) + est = GeneralizedLinearEstimator( + datafit=datafit, + penalty=L1(alpha=self.alpha), + solver=FISTA(max_iter=self.max_iter, tol=self.tol) + ) + est.coef_ = w + est.fit(X, y) + w = est.coef_ + + if self.fit_intercept: + pred = X @ w + intercept + lipschitz = datafit.get_global_lipschitz(X, y) + grad = np.mean(datafit.raw_grad(y, pred)) + intercept -= grad / lipschitz + + # Debug prints + residuals = y - X.dot(w) - intercept + obj_value = datafit.value(residuals, None, residuals) + \ + self.alpha * np.sum(np.abs(w)) + print(f"Delta: {delta:.6f}, Objective: {obj_value:.4f}, " + f"Intercept: {intercept:.4f}, " + f"Non-zero coefs: {np.sum(np.abs(w) > 1e-6)}, " + f"Lipschitz: {lipschitz:.4f}") + print(f"Residual stats - mean: {np.mean(residuals):.4f}, " + f"std: {np.std(residuals):.4f}, " + f"min: {np.min(residuals):.4f}, " + f"max: {np.max(residuals):.4f}") + + coverage = np.mean(y <= X.dot(w) + intercept) + print(f"Coverage: {coverage:.4f} (target: {self.quantile:.4f})") + + self.coef_, self.intercept_ = w, intercept + return self + + def predict(self, X): + """Predict using the fitted model.""" + check_array(X) + return X @ self.coef_ + self.intercept_ From 010c399cb55f31accd83d3dfce5679cd1cbc8b72 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 26 May 2025 16:21:59 +0200 Subject: [PATCH 02/16] make basic version without intercept handling and progressive smoothing --- examples/plot_smooth_quantile.py | 120 ++++++------ skglm/experimental/__init__.py | 5 +- skglm/experimental/quantile_huber.py | 172 ++++++++++-------- .../experimental/smooth_quantile_regressor.py | 75 -------- 4 files changed, 154 insertions(+), 218 deletions(-) delete mode 100644 skglm/experimental/smooth_quantile_regressor.py diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index 9232c815f..7ad0b0db2 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -1,75 +1,75 @@ """ -=========================================== -Smooth Quantile Regression Example -=========================================== - +QuantileHuber vs Sklearn """ - import numpy as np -import matplotlib.pyplot as plt import time -from sklearn.datasets import make_regression -from sklearn.preprocessing import StandardScaler from sklearn.linear_model import QuantileRegressor -from skglm.experimental.smooth_quantile_regressor import SmoothQuantileRegressor -from skglm.experimental.quantile_huber import QuantileHuber +from skglm.experimental.quantile_huber import QuantileHuber, SimpleQuantileRegressor +import matplotlib.pyplot as plt +from sklearn.datasets import make_regression + +# TODO: no smoothing and no intercept handling yet + + +def pinball_loss(residuals, quantile): + """True pinball loss.""" + return np.mean(residuals * (quantile - (residuals < 0))) -X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) -X = StandardScaler().fit_transform(X) -tau = 0.75 -t0 = time.time() -reg_skglm = SmoothQuantileRegressor(quantile=tau).fit(X, y) -t1 = time.time() -reg_sklearn = QuantileRegressor(quantile=tau, alpha=0.1, solver='highs').fit(X, y) -t2 = time.time() +def create_data(n_samples=1000, n_features=10, noise=0.1): + X, y = make_regression(n_samples=n_samples, n_features=n_features, noise=noise) + return X, y -y_pred_skglm, y_pred_sklearn = reg_skglm.predict(X), reg_sklearn.predict(X) -coverage_skglm = np.mean(y <= y_pred_skglm) -coverage_sklearn = np.mean(y <= y_pred_sklearn) -print(f"\nTiming: skglm={t1-t0:.3f}s, sklearn={t2-t1:.3f}s, " - f"speedup={(t2-t1)/(t1-t0):.1f}x") -print(f"Coverage (target {tau}): skglm={coverage_skglm:.3f}, " - f"sklearn={coverage_sklearn:.3f}") -print(f"Non-zero coefs: skglm={np.sum(reg_skglm.coef_ != 0)}, " - f"sklearn={np.sum(reg_sklearn.coef_ != 0)}") +def plot_quantile_huber(): + quantiles = [0.1, 0.5, 0.9] + delta = 0.5 + residuals = np.linspace(-3, 3, 500) + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) + for tau in quantiles: + qh = QuantileHuber(quantile=tau, delta=delta) + loss = [qh._loss_scalar(r) for r in residuals] + grad = [qh._grad_scalar(r) for r in residuals] + ax1.plot(residuals, loss, label=f"τ={tau}") + ax2.plot(residuals, grad, label=f"τ={tau}") + ax1.set_title("QuantileHuber Loss") + ax1.set_xlabel("Residual") + ax1.set_ylabel("Loss") + ax1.legend() + ax2.set_title("QuantileHuber Gradient") + ax2.set_xlabel("Residual") + ax2.set_ylabel("Gradient") + ax2.legend() + plt.tight_layout() + plt.show() -# Visualizations -def pinball(y_true, y_pred): - diff = y_true - y_pred - return np.mean(np.where(diff >= 0, tau * diff, (1 - tau) * -diff)) +if __name__ == "__main__": + X, y = create_data() + tau = 0.8 + start = time.time() + sk = QuantileRegressor(quantile=tau, alpha=0.001, fit_intercept=False) + sk.fit(X, y) + sk_time = time.time() - start + sk_pred = sk.predict(X) + sk_cov = np.mean(y <= sk_pred) + sk_pinball = pinball_loss(y - sk_pred, tau) -print(f"Pinball loss: skglm={pinball(y, y_pred_skglm):.4f}, " - f"sklearn={pinball(y, y_pred_sklearn):.4f}") + start = time.time() + qh = SimpleQuantileRegressor(quantile=tau, alpha=0.001, delta=0.05) + qh.fit(X, y) + qh_time = time.time() - start + qh_pred = qh.predict(X) + qh_cov = np.mean(y <= qh_pred) + qh_pinball = pinball_loss(y - qh_pred, tau) -plt.figure(figsize=(12, 5)) -plt.subplot(121) -residuals = np.linspace(-2, 2, 1000) -for delta in [1.0, 0.5, 0.1]: - loss = QuantileHuber(quantile=tau, delta=delta) - losses = [loss.value(np.array([r]), np.array([[1]]), np.array([0])) - for r in residuals] - plt.plot(residuals, losses, label=f'δ={delta}') -plt.plot(residuals, [tau * max(r, 0) + (1 - tau) * max(-r, 0) - for r in residuals], 'k--', label='Pinball') -plt.axvline(x=0, color='k', linestyle='--', alpha=0.3) -plt.xlabel('Residual (y - y_pred)') -plt.ylabel('Loss') -plt.title('Quantile Huber Loss (τ=0.75)') -plt.legend() -plt.grid(True, alpha=0.3) + print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " + f"{'Pinball':<8}") + print("-" * 55) + print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " + f"{sk_pinball:<8.4f}") + print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " + f"{qh_pinball:<8.4f}") -plt.subplot(122) -plt.hist(y - y_pred_skglm, bins=50, alpha=0.5, label='skglm') -plt.hist(y - y_pred_sklearn, bins=50, alpha=0.5, label='sklearn') -plt.axvline(0, color='k', linestyle='--') -plt.xlabel('Residual (y - y_pred)') -plt.ylabel('Count') -plt.title('Residuals Histogram') -plt.legend() -plt.grid(True, alpha=0.3) -plt.tight_layout() -plt.show() + plot_quantile_huber() diff --git a/skglm/experimental/__init__.py b/skglm/experimental/__init__.py index 75ecb3dc7..922b260aa 100644 --- a/skglm/experimental/__init__.py +++ b/skglm/experimental/__init__.py @@ -2,8 +2,7 @@ from .sqrt_lasso import SqrtLasso, SqrtQuadratic from .pdcd_ws import PDCD_WS from .quantile_regression import Pinball -from .quantile_huber import QuantileHuber -from .smooth_quantile_regressor import SmoothQuantileRegressor +from .quantile_huber import QuantileHuber, SimpleQuantileRegressor __all__ = [ IterativeReweightedL1, @@ -12,5 +11,5 @@ SqrtQuadratic, SqrtLasso, QuantileHuber, - SmoothQuantileRegressor, + SimpleQuantileRegressor, ] diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index 9435d1d50..40f4bfdf4 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -1,13 +1,16 @@ -import numpy as np from numba import float64 from skglm.datafits.single_task import Huber -from skglm.utils.sparse_ops import spectral_norm +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_X_y, check_array +from skglm.solvers import FISTA +from skglm.penalties import L1 +from skglm.estimators import GeneralizedLinearEstimator class QuantileHuber(Huber): r"""Quantile Huber loss for quantile regression. - Implements the smoothed pinball loss with quadratic region: + Implements the smoothed pinball loss: .. math:: @@ -25,17 +28,13 @@ class QuantileHuber(Huber): Desired quantile level between 0 and 1. delta : float, default=1.0 Width of quadratic region. - - References - ---------- - Chen, C. (2007). A Finite Smoothing Algorithm for Quantile Regression. - Journal of Computational and Graphical Statistics, 16(1), 136–164. - http://www.jstor.org/stable/27594233 """ def __init__(self, quantile=0.5, delta=1.0): if not 0 < quantile < 1: raise ValueError("quantile must be between 0 and 1") + if delta <= 0: + raise ValueError("delta must be positive") self.delta = float(delta) self.quantile = float(quantile) @@ -45,80 +44,93 @@ def get_spec(self): def params_to_dict(self): return dict(delta=self.delta, quantile=self.quantile) - def _loss_and_grad_scalar(self, residual): - """Calculate loss and gradient for a single residual.""" + def value(self, y, w, Xw): + """Compute the quantile Huber loss value.""" + n_samples = len(y) + res = 0.0 + for i in range(n_samples): + residual = y[i] - Xw[i] + res += self._loss_scalar(residual) + return res / n_samples + + def _loss_scalar(self, residual): + """Calculate loss for a single residual.""" tau = self.quantile delta = self.delta - abs_r = abs(residual) - - # Quadratic core: |r| ≤ delta - if abs_r <= delta: - if residual >= 0: - # 0 ≤ r ≤ delta - loss = tau * residual**2 / (2 * delta) - grad = tau * residual / delta - else: - # -delta ≤ r < 0 - loss = (1 - tau) * residual**2 / (2 * delta) - grad = (1 - tau) * residual / delta - return loss, grad - - # Linear tails: |r| > delta - if residual > delta: - loss = tau * (residual - delta / 2) - grad = tau - return loss, grad + r = residual + + if r >= delta: + # Upper linear tail: r >= delta + return tau * (r - delta/2) + elif r >= 0: + # Upper quadratic: 0 <= r < delta + return tau * r**2 / (2 * delta) + elif r > -delta: + # Lower quadratic: -delta < r < 0 + return (1 - tau) * r**2 / (2 * delta) else: - loss = (1 - tau) * (-residual - delta / 2) - grad = tau - 1 - return loss, grad + # Lower linear tail: r <= -delta + return (1 - tau) * (-r - delta/2) - def value(self, y, w, Xw): - """Compute the quantile Huber loss value.""" - residuals = y - Xw - loss = np.zeros_like(residuals) - for i, r in enumerate(residuals): - loss[i], _ = self._loss_and_grad_scalar(r) - return np.mean(loss) - - def raw_grad(self, y, Xw): - """Compute gradient of datafit w.r.t Xw.""" - residuals = y - Xw - grad = np.zeros_like(residuals) - for i, r in enumerate(residuals): - _, grad[i] = self._loss_and_grad_scalar(r) - return -grad - - def get_lipschitz(self, X, y): - """Compute coordinate-wise Lipschitz constants.""" - weight = max(self.quantile, 1 - self.quantile) - return weight * (X ** 2).sum(axis=0) / (len(y) * self.delta) - - def get_global_lipschitz(self, X, y): - """Compute global Lipschitz constant.""" - weight = max(self.quantile, 1 - self.quantile) - return weight * np.linalg.norm(X, 2) ** 2 / (len(y) * self.delta) - - def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y): - """Compute coordinate-wise Lipschitz constants for sparse X.""" - n_samples = len(y) - weight = max(self.quantile, 1 - self.quantile) - n_features = len(X_indptr) - 1 - lipschitz = np.zeros(n_features, dtype=X_data.dtype) - for j in range(n_features): - nrm2 = 0.0 - for idx in range(X_indptr[j], X_indptr[j + 1]): - nrm2 += X_data[idx] ** 2 - lipschitz[j] = weight * nrm2 / (n_samples * self.delta) - return lipschitz - - def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y): - """Compute global Lipschitz constant for sparse X.""" + def gradient_scalar(self, X, y, w, Xw, j): + """Compute gradient w.r.t. w_j - following parent class pattern.""" n_samples = len(y) - weight = max(self.quantile, 1 - self.quantile) - return weight * spectral_norm( - X_data, X_indptr, X_indices, n_samples - ) ** 2 / (n_samples * self.delta) + grad_j = 0.0 + for i in range(n_samples): + residual = y[i] - Xw[i] + grad_j += -X[i, j] * self._grad_scalar(residual) + return grad_j / n_samples + + def _grad_scalar(self, residual): + """Calculate gradient for a single residual.""" + tau = self.quantile + delta = self.delta + r = residual + + if r >= delta: + # Upper linear tail: r >= delta + return tau + elif r >= 0: + # Upper quadratic: 0 <= r < delta + return tau * r / delta + elif r > -delta: + # Lower quadratic: -delta < r < 0 + return (1 - tau) * r / delta + else: + # Lower linear tail: r <= -delta + return tau - 1 + + +class SimpleQuantileRegressor(BaseEstimator, RegressorMixin): + """Simple quantile regression without progressive smoothing.""" + + def __init__(self, quantile=0.5, alpha=0.1, delta=0.1, max_iter=1000, tol=1e-4): + self.quantile = quantile + self.alpha = alpha + self.delta = delta + self.max_iter = max_iter + self.tol = tol + + def fit(self, X, y): + """Fit using FISTA with fixed delta.""" + X, y = check_X_y(X, y) + + datafit = QuantileHuber(quantile=self.quantile, delta=self.delta) + penalty = L1(alpha=self.alpha) + solver = FISTA(max_iter=self.max_iter, tol=self.tol) + + est = GeneralizedLinearEstimator( + datafit=datafit, + penalty=penalty, + solver=solver + ) + + est.fit(X, y) + self.coef_ = est.coef_ + + return self - def intercept_update_step(self, y, Xw): - return -np.mean(self.raw_grad(y, Xw)) + def predict(self, X): + """Predict using the fitted model.""" + X = check_array(X) + return X @ self.coef_ diff --git a/skglm/experimental/smooth_quantile_regressor.py b/skglm/experimental/smooth_quantile_regressor.py deleted file mode 100644 index 73c9e8147..000000000 --- a/skglm/experimental/smooth_quantile_regressor.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils.validation import check_X_y, check_array -from ..solvers import FISTA -from ..penalties import L1 -from ..estimators import GeneralizedLinearEstimator -from .quantile_huber import QuantileHuber - - -class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): - """Quantile regression with progressive smoothing using Huberized loss.""" - - def __init__(self, quantile=0.75, alpha=1e-8, max_iter=1000, tol=1e-6, - delta_init=1.0, delta_final=1e-4, n_deltas=10, fit_intercept=True): - self.quantile = quantile - self.alpha = alpha - self.max_iter = max_iter - self.tol = tol - self.delta_init = delta_init - self.delta_final = delta_final - self.n_deltas = n_deltas - self.fit_intercept = fit_intercept - self.intercept_ = 0.0 - - def fit(self, X, y): - """Fit using FISTA with decreasing smoothing parameter delta. - - For each delta level: - - Update coefficients using FISTA - - Update intercept using gradient step - """ - X, y = check_X_y(X, y) - w = np.zeros(X.shape[1]) - intercept = np.quantile(y, self.quantile) if self.fit_intercept else 0.0 - - for delta in np.geomspace(self.delta_init, self.delta_final, self.n_deltas): - datafit = QuantileHuber(quantile=self.quantile, delta=delta) - est = GeneralizedLinearEstimator( - datafit=datafit, - penalty=L1(alpha=self.alpha), - solver=FISTA(max_iter=self.max_iter, tol=self.tol) - ) - est.coef_ = w - est.fit(X, y) - w = est.coef_ - - if self.fit_intercept: - pred = X @ w + intercept - lipschitz = datafit.get_global_lipschitz(X, y) - grad = np.mean(datafit.raw_grad(y, pred)) - intercept -= grad / lipschitz - - # Debug prints - residuals = y - X.dot(w) - intercept - obj_value = datafit.value(residuals, None, residuals) + \ - self.alpha * np.sum(np.abs(w)) - print(f"Delta: {delta:.6f}, Objective: {obj_value:.4f}, " - f"Intercept: {intercept:.4f}, " - f"Non-zero coefs: {np.sum(np.abs(w) > 1e-6)}, " - f"Lipschitz: {lipschitz:.4f}") - print(f"Residual stats - mean: {np.mean(residuals):.4f}, " - f"std: {np.std(residuals):.4f}, " - f"min: {np.min(residuals):.4f}, " - f"max: {np.max(residuals):.4f}") - - coverage = np.mean(y <= X.dot(w) + intercept) - print(f"Coverage: {coverage:.4f} (target: {self.quantile:.4f})") - - self.coef_, self.intercept_ = w, intercept - return self - - def predict(self, X): - """Predict using the fitted model.""" - check_array(X) - return X @ self.coef_ + self.intercept_ From e4888d92feb476fdce12973fee2cbd385bb3989d Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 26 May 2025 17:54:28 +0200 Subject: [PATCH 03/16] add progressive smoothing --- examples/plot_smooth_quantile.py | 17 ++++++--- skglm/experimental/__init__.py | 4 +- skglm/experimental/quantile_huber.py | 57 ++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index 7ad0b0db2..6b9ac940d 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -4,11 +4,11 @@ import numpy as np import time from sklearn.linear_model import QuantileRegressor -from skglm.experimental.quantile_huber import QuantileHuber, SimpleQuantileRegressor +from skglm.experimental.quantile_huber import QuantileHuber, SmoothQuantileRegressor import matplotlib.pyplot as plt from sklearn.datasets import make_regression -# TODO: no smoothing and no intercept handling yet +# TODO: no intercept handling yet def pinball_loss(residuals, quantile): @@ -25,7 +25,7 @@ def plot_quantile_huber(): quantiles = [0.1, 0.5, 0.9] delta = 0.5 residuals = np.linspace(-3, 3, 500) - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) + _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) for tau in quantiles: qh = QuantileHuber(quantile=tau, delta=delta) loss = [qh._loss_scalar(r) for r in residuals] @@ -49,7 +49,7 @@ def plot_quantile_huber(): tau = 0.8 start = time.time() - sk = QuantileRegressor(quantile=tau, alpha=0.001, fit_intercept=False) + sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=False) sk.fit(X, y) sk_time = time.time() - start sk_pred = sk.predict(X) @@ -57,7 +57,14 @@ def plot_quantile_huber(): sk_pinball = pinball_loss(y - sk_pred, tau) start = time.time() - qh = SimpleQuantileRegressor(quantile=tau, alpha=0.001, delta=0.05) + qh = SmoothQuantileRegressor( + quantile=tau, + alpha=0.1, + delta_init=0.5, + delta_final=0.05, + n_deltas=5, + verbose=True + ) qh.fit(X, y) qh_time = time.time() - start qh_pred = qh.predict(X) diff --git a/skglm/experimental/__init__.py b/skglm/experimental/__init__.py index 922b260aa..caa1aa819 100644 --- a/skglm/experimental/__init__.py +++ b/skglm/experimental/__init__.py @@ -2,7 +2,7 @@ from .sqrt_lasso import SqrtLasso, SqrtQuadratic from .pdcd_ws import PDCD_WS from .quantile_regression import Pinball -from .quantile_huber import QuantileHuber, SimpleQuantileRegressor +from .quantile_huber import QuantileHuber, SmoothQuantileRegressor __all__ = [ IterativeReweightedL1, @@ -11,5 +11,5 @@ SqrtQuadratic, SqrtLasso, QuantileHuber, - SimpleQuantileRegressor, + SmoothQuantileRegressor, ] diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index 40f4bfdf4..344adb2b3 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -1,3 +1,4 @@ +import numpy as np from numba import float64 from skglm.datafits.single_task import Huber from sklearn.base import BaseEstimator, RegressorMixin @@ -101,32 +102,58 @@ def _grad_scalar(self, residual): return tau - 1 -class SimpleQuantileRegressor(BaseEstimator, RegressorMixin): - """Simple quantile regression without progressive smoothing.""" +class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): + """Quantile regression with progressive smoothing.""" - def __init__(self, quantile=0.5, alpha=0.1, delta=0.1, max_iter=1000, tol=1e-4): + def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, + n_deltas=10, max_iter=1000, tol=1e-4, verbose=False): self.quantile = quantile self.alpha = alpha - self.delta = delta + self.delta_init = delta_init + self.delta_final = delta_final + self.n_deltas = n_deltas self.max_iter = max_iter self.tol = tol + self.verbose = verbose def fit(self, X, y): - """Fit using FISTA with fixed delta.""" + """Fit using progressive smoothing: delta_init --> delta_final.""" X, y = check_X_y(X, y) + w = np.zeros(X.shape[1]) + deltas = np.geomspace(self.delta_init, self.delta_final, self.n_deltas) - datafit = QuantileHuber(quantile=self.quantile, delta=self.delta) - penalty = L1(alpha=self.alpha) - solver = FISTA(max_iter=self.max_iter, tol=self.tol) + if self.verbose: + print( + f"Progressive smoothing: delta {self.delta_init:.3f} --> " + f"{self.delta_final:.3f} in {self.n_deltas} steps") - est = GeneralizedLinearEstimator( - datafit=datafit, - penalty=penalty, - solver=solver - ) + for i, delta in enumerate(deltas): + datafit = QuantileHuber(quantile=self.quantile, delta=delta) + penalty = L1(alpha=self.alpha) + solver = FISTA(max_iter=self.max_iter, tol=self.tol) - est.fit(X, y) - self.coef_ = est.coef_ + est = GeneralizedLinearEstimator( + datafit=datafit, + penalty=penalty, + solver=solver + ) + + if i > 0: + est.coef_ = w.copy() + + est.fit(X, y) + w = est.coef_.copy() + + if self.verbose: + residuals = y - X @ w + coverage = np.mean(residuals <= 0) + pinball_loss = np.mean(residuals * (self.quantile - (residuals < 0))) + + print( + f" Stage {i+1:2d}: delta={delta:.4f}, " + f"coverage={coverage:.3f}, pinball_loss={pinball_loss:.6f}") + + self.coef_ = w return self From be991c135d3036cae8474ae154c9472ba63586cb Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 28 May 2025 13:29:14 +0200 Subject: [PATCH 04/16] din't inherit from Huber --- examples/plot_smooth_quantile.py | 63 ++++++++++++++-------------- skglm/experimental/quantile_huber.py | 2 +- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index 6b9ac940d..2dc74c75d 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -44,39 +44,38 @@ def plot_quantile_huber(): plt.show() -if __name__ == "__main__": - X, y = create_data() - tau = 0.8 +X, y = create_data() +tau = 0.8 - start = time.time() - sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=False) - sk.fit(X, y) - sk_time = time.time() - start - sk_pred = sk.predict(X) - sk_cov = np.mean(y <= sk_pred) - sk_pinball = pinball_loss(y - sk_pred, tau) +start = time.time() +sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=False) +sk.fit(X, y) +sk_time = time.time() - start +sk_pred = sk.predict(X) +sk_cov = np.mean(y <= sk_pred) +sk_pinball = pinball_loss(y - sk_pred, tau) - start = time.time() - qh = SmoothQuantileRegressor( - quantile=tau, - alpha=0.1, - delta_init=0.5, - delta_final=0.05, - n_deltas=5, - verbose=True - ) - qh.fit(X, y) - qh_time = time.time() - start - qh_pred = qh.predict(X) - qh_cov = np.mean(y <= qh_pred) - qh_pinball = pinball_loss(y - qh_pred, tau) +start = time.time() +qh = SmoothQuantileRegressor( + quantile=tau, + alpha=0.1, + delta_init=0.5, + delta_final=0.05, + n_deltas=5, + verbose=True +) +qh.fit(X, y) +qh_time = time.time() - start +qh_pred = qh.predict(X) +qh_cov = np.mean(y <= qh_pred) +qh_pinball = pinball_loss(y - qh_pred, tau) - print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " - f"{'Pinball':<8}") - print("-" * 55) - print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " - f"{sk_pinball:<8.4f}") - print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " - f"{qh_pinball:<8.4f}") +print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " + f"{'Pinball':<8}") +print("-" * 55) +print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " + f"{sk_pinball:<8.4f}") +print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " + f"{qh_pinball:<8.4f}") - plot_quantile_huber() +plot_quantile_huber() diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index 344adb2b3..24e60f270 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -8,7 +8,7 @@ from skglm.estimators import GeneralizedLinearEstimator -class QuantileHuber(Huber): +class QuantileHuber(): r"""Quantile Huber loss for quantile regression. Implements the smoothed pinball loss: From 575ffbb00fa858a2c8fca5aa148574e6d2565b68 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Fri, 30 May 2025 12:40:17 +0200 Subject: [PATCH 05/16] implemented lipschitz for dense case, support for AndersonCD and adressed feedback comments --- examples/plot_smooth_quantile.py | 66 +++++++++++------------ skglm/experimental/quantile_huber.py | 78 ++++++++++++++++++---------- 2 files changed, 82 insertions(+), 62 deletions(-) diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index 6b9ac940d..cda18e1b3 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -16,43 +16,18 @@ def pinball_loss(residuals, quantile): return np.mean(residuals * (quantile - (residuals < 0))) -def create_data(n_samples=1000, n_features=10, noise=0.1): - X, y = make_regression(n_samples=n_samples, n_features=n_features, noise=noise) - return X, y - - -def plot_quantile_huber(): - quantiles = [0.1, 0.5, 0.9] - delta = 0.5 - residuals = np.linspace(-3, 3, 500) - _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) - for tau in quantiles: - qh = QuantileHuber(quantile=tau, delta=delta) - loss = [qh._loss_scalar(r) for r in residuals] - grad = [qh._grad_scalar(r) for r in residuals] - ax1.plot(residuals, loss, label=f"τ={tau}") - ax2.plot(residuals, grad, label=f"τ={tau}") - ax1.set_title("QuantileHuber Loss") - ax1.set_xlabel("Residual") - ax1.set_ylabel("Loss") - ax1.legend() - ax2.set_title("QuantileHuber Gradient") - ax2.set_xlabel("Residual") - ax2.set_ylabel("Gradient") - ax2.legend() - plt.tight_layout() - plt.show() - - if __name__ == "__main__": - X, y = create_data() + X, y = make_regression(n_samples=10000, n_features=1000, noise=0.1, random_state=0) tau = 0.8 + X_c = X - X.mean(axis=0) + q_tau = np.quantile(y, tau) + y_c = y - q_tau start = time.time() sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=False) - sk.fit(X, y) + sk.fit(X_c, y_c) + sk_pred = sk.predict(X_c) + q_tau sk_time = time.time() - start - sk_pred = sk.predict(X) sk_cov = np.mean(y <= sk_pred) sk_pinball = pinball_loss(y - sk_pred, tau) @@ -61,13 +36,14 @@ def plot_quantile_huber(): quantile=tau, alpha=0.1, delta_init=0.5, - delta_final=0.05, + delta_final=0.01, n_deltas=5, + solver="AndersonCD", verbose=True ) - qh.fit(X, y) + qh.fit(X_c, y_c) qh_time = time.time() - start - qh_pred = qh.predict(X) + qh_pred = qh.predict(X_c) + q_tau qh_cov = np.mean(y <= qh_pred) qh_pinball = pinball_loss(y - qh_pred, tau) @@ -79,4 +55,24 @@ def plot_quantile_huber(): print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " f"{qh_pinball:<8.4f}") - plot_quantile_huber() + +quantiles = [0.1, 0.5, 0.9] +delta = 0.5 +residuals = np.linspace(-3, 3, 500) +_, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) +for tau in quantiles: + qh = QuantileHuber(quantile=tau, delta=delta) + loss = [qh._loss_sample(r) for r in residuals] + grad = [qh._grad_per_sample(r) for r in residuals] + ax1.plot(residuals, loss, label=f"τ={tau}") + ax2.plot(residuals, grad, label=f"τ={tau}") +ax1.set_title("QuantileHuber Loss") +ax1.set_xlabel("Residual") +ax1.set_ylabel("Loss") +ax1.legend() +ax2.set_title("QuantileHuber Gradient") +ax2.set_xlabel("Residual") +ax2.set_ylabel("Gradient") +ax2.legend() +plt.tight_layout() +plt.show() diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index 344adb2b3..cf8126d38 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -1,14 +1,14 @@ import numpy as np +from numpy.linalg import norm from numba import float64 -from skglm.datafits.single_task import Huber +from skglm.datafits.base import BaseDatafit from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils.validation import check_X_y, check_array -from skglm.solvers import FISTA +from skglm.solvers import FISTA, AndersonCD from skglm.penalties import L1 from skglm.estimators import GeneralizedLinearEstimator -class QuantileHuber(Huber): +class QuantileHuber(BaseDatafit): r"""Quantile Huber loss for quantile regression. Implements the smoothed pinball loss: @@ -51,11 +51,11 @@ def value(self, y, w, Xw): res = 0.0 for i in range(n_samples): residual = y[i] - Xw[i] - res += self._loss_scalar(residual) + res += self._loss_sample(residual) return res / n_samples - def _loss_scalar(self, residual): - """Calculate loss for a single residual.""" + def _loss_sample(self, residual): + """Calculate loss for a single sample.""" tau = self.quantile delta = self.delta r = residual @@ -79,11 +79,11 @@ def gradient_scalar(self, X, y, w, Xw, j): grad_j = 0.0 for i in range(n_samples): residual = y[i] - Xw[i] - grad_j += -X[i, j] * self._grad_scalar(residual) + grad_j += -X[i, j] * self._grad_per_sample(residual) return grad_j / n_samples - def _grad_scalar(self, residual): - """Calculate gradient for a single residual.""" + def _grad_per_sample(self, residual): + """Calculate gradient for a single sample.""" tau = self.quantile delta = self.delta r = residual @@ -101,12 +101,26 @@ def _grad_scalar(self, residual): # Lower linear tail: r <= -delta return tau - 1 + def get_lipschitz(self, X, y): + n_features = X.shape[1] + + lipschitz = np.zeros(n_features, dtype=X.dtype) + c = max(self.quantile, 1 - self.quantile) / self.delta + for j in range(n_features): + lipschitz[j] = c * (X[:, j] ** 2).sum() / len(y) + + return lipschitz + + def get_global_lipschitz(self, X, y): + c = max(self.quantile, 1 - self.quantile) / self.delta + return c * norm(X, ord=2) ** 2 / len(y) + class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): """Quantile regression with progressive smoothing.""" def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, - n_deltas=10, max_iter=1000, tol=1e-4, verbose=False): + n_deltas=10, max_iter=1000, tol=1e-4, verbose=False, solver="FISTA"): self.quantile = quantile self.alpha = alpha self.delta_init = delta_init @@ -115,10 +129,10 @@ def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, self.max_iter = max_iter self.tol = tol self.verbose = verbose + self.solver = solver def fit(self, X, y): """Fit using progressive smoothing: delta_init --> delta_final.""" - X, y = check_X_y(X, y) w = np.zeros(X.shape[1]) deltas = np.geomspace(self.delta_init, self.delta_final, self.n_deltas) @@ -127,19 +141,26 @@ def fit(self, X, y): f"Progressive smoothing: delta {self.delta_init:.3f} --> " f"{self.delta_final:.3f} in {self.n_deltas} steps") - for i, delta in enumerate(deltas): - datafit = QuantileHuber(quantile=self.quantile, delta=delta) - penalty = L1(alpha=self.alpha) - solver = FISTA(max_iter=self.max_iter, tol=self.tol) + datafit = QuantileHuber(quantile=self.quantile, delta=self.delta_init) + penalty = L1(alpha=self.alpha) + # Solver selection + if isinstance(self.solver, str): + if self.solver == "FISTA": + solver = FISTA(max_iter=self.max_iter, tol=self.tol) + solver.warm_start = True + elif self.solver == "AndersonCD": + solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, + warm_start=True, fit_intercept=False) + else: + raise ValueError(f"Unknown solver: {self.solver}") + else: + solver = self.solver - est = GeneralizedLinearEstimator( - datafit=datafit, - penalty=penalty, - solver=solver - ) + est = GeneralizedLinearEstimator( + datafit=datafit, penalty=penalty, solver=solver) - if i > 0: - est.coef_ = w.copy() + for i, delta in enumerate(deltas): + datafit.delta = float(delta) est.fit(X, y) w = est.coef_.copy() @@ -151,13 +172,16 @@ def fit(self, X, y): print( f" Stage {i+1:2d}: delta={delta:.4f}, " - f"coverage={coverage:.3f}, pinball_loss={pinball_loss:.6f}") + f"coverage={coverage:.3f}, pinball_loss={pinball_loss:.6f}, " + f"n_iter={est.n_iter_}" + ) - self.coef_ = w + self.est = est return self def predict(self, X): """Predict using the fitted model.""" - X = check_array(X) - return X @ self.coef_ + if not hasattr(self, "est"): + raise ValueError("Call 'fit' before 'predict'.") + return self.est.predict(X) From 39ced0d78efc1c2f5e9b85b0746ad9f56fdf83a2 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Fri, 30 May 2025 14:32:08 +0200 Subject: [PATCH 06/16] add intercept method (only works for AndersonCD so far) --- examples/plot_smooth_quantile.py | 20 ++++++++------------ skglm/experimental/quantile_huber.py | 23 +++++++++++++++++++++-- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index 0700f6856..eca638a3c 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -8,24 +8,19 @@ import matplotlib.pyplot as plt from sklearn.datasets import make_regression -# TODO: no intercept handling yet - def pinball_loss(residuals, quantile): """True pinball loss.""" return np.mean(residuals * (quantile - (residuals < 0))) -X, y = make_regression(n_samples=10000, n_features=1000, noise=0.1, random_state=0) +X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=0) tau = 0.8 -X_c = X - X.mean(axis=0) -q_tau = np.quantile(y, tau) -y_c = y - q_tau start = time.time() -sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=False) -sk.fit(X_c, y_c) -sk_pred = sk.predict(X_c) + q_tau +sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=True) +sk.fit(X, y) +sk_pred = sk.predict(X) sk_time = time.time() - start sk_cov = np.mean(y <= sk_pred) sk_pinball = pinball_loss(y - sk_pred, tau) @@ -38,11 +33,12 @@ def pinball_loss(residuals, quantile): delta_final=0.01, n_deltas=5, solver="AndersonCD", - verbose=True + verbose=True, + fit_intercept=True ) -qh.fit(X_c, y_c) +qh.fit(X, y) qh_time = time.time() - start -qh_pred = qh.predict(X_c) + q_tau +qh_pred = qh.predict(X) qh_cov = np.mean(y <= qh_pred) qh_pinball = pinball_loss(y - qh_pred, tau) diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index cf8126d38..bc168f2ed 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -115,12 +115,21 @@ def get_global_lipschitz(self, X, y): c = max(self.quantile, 1 - self.quantile) / self.delta return c * norm(X, ord=2) ** 2 / len(y) + def intercept_update_step(self, y, Xw): + n_samples = len(y) + update = 0.0 + for i in range(n_samples): + residual = y[i] - Xw[i] + update -= self._grad_per_sample(residual) + return update / n_samples + class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): """Quantile regression with progressive smoothing.""" def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, - n_deltas=10, max_iter=1000, tol=1e-4, verbose=False, solver="FISTA"): + n_deltas=10, max_iter=1000, tol=1e-4, verbose=False, + solver="AndersonCD", fit_intercept=True): self.quantile = quantile self.alpha = alpha self.delta_init = delta_init @@ -130,6 +139,7 @@ def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, self.tol = tol self.verbose = verbose self.solver = solver + self.fit_intercept = fit_intercept def fit(self, X, y): """Fit using progressive smoothing: delta_init --> delta_final.""" @@ -146,11 +156,18 @@ def fit(self, X, y): # Solver selection if isinstance(self.solver, str): if self.solver == "FISTA": + if self.fit_intercept: + import warnings + warnings.warn( + "FISTA solver does not support intercept. " + "Falling back to fit_intercept=False." + ) + self.fit_intercept = False solver = FISTA(max_iter=self.max_iter, tol=self.tol) solver.warm_start = True elif self.solver == "AndersonCD": solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, - warm_start=True, fit_intercept=False) + warm_start=True, fit_intercept=self.fit_intercept) else: raise ValueError(f"Unknown solver: {self.solver}") else: @@ -167,6 +184,8 @@ def fit(self, X, y): if self.verbose: residuals = y - X @ w + if self.fit_intercept: + residuals -= est.intercept_ coverage = np.mean(residuals <= 0) pinball_loss = np.mean(residuals * (self.quantile - (residuals < 0))) From 4375b4c26a6ccefb2478f2a74a72987c19052405 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Fri, 13 Jun 2025 08:26:16 +0200 Subject: [PATCH 07/16] script debug quantile --- debug_quantile.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 debug_quantile.py diff --git a/debug_quantile.py b/debug_quantile.py new file mode 100644 index 000000000..09dddddda --- /dev/null +++ b/debug_quantile.py @@ -0,0 +1,77 @@ +""" +QuantileHuber vs Sklearn +""" +import numpy as np +import time +from sklearn.linear_model import QuantileRegressor +from skglm.experimental.quantile_huber import QuantileHuber, SmoothQuantileRegressor +import matplotlib.pyplot as plt +from sklearn.datasets import make_regression + + +def pinball_loss(residuals, quantile): + """True pinball loss.""" + return np.mean(residuals * (quantile - (residuals < 0))) + + +X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=0) +tau = 0.8 + +start = time.time() +sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=True) +sk.fit(X, y) +sk_pred = sk.predict(X) +sk_time = time.time() - start +sk_cov = np.mean(y <= sk_pred) +sk_pinball = pinball_loss(y - sk_pred, tau) + +start = time.time() +qh = SmoothQuantileRegressor( + quantile=tau, + alpha=0.1, + delta_init=0.5, + delta_final=0.01, + n_deltas=5, + solver="AndersonCD", + verbose=True, + fit_intercept=True +) +qh.fit(X, y) +qh_time = time.time() - start +qh_pred = qh.predict(X) +qh_cov = np.mean(y <= qh_pred) +qh_pinball = pinball_loss(y - qh_pred, tau) + + +print(sk.coef_) +print(qh.est.coef_) + +# print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " +# f"{'Pinball':<8}") +# print("-" * 55) +# print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " +# f"{sk_pinball:<8.4f}") +# print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " +# f"{qh_pinball:<8.4f}") + + +# quantiles = [0.1, 0.5, 0.9] +# delta = 0.5 +# residuals = np.linspace(-3, 3, 500) +# _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) +# for tau in quantiles: +# qh = QuantileHuber(quantile=tau, delta=delta) +# loss = [qh._loss_sample(r) for r in residuals] +# grad = [qh._grad_per_sample(r) for r in residuals] +# ax1.plot(residuals, loss, label=f"τ={tau}") +# ax2.plot(residuals, grad, label=f"τ={tau}") +# ax1.set_title("QuantileHuber Loss") +# ax1.set_xlabel("Residual") +# ax1.set_ylabel("Loss") +# ax1.legend() +# ax2.set_title("QuantileHuber Gradient") +# ax2.set_xlabel("Residual") +# ax2.set_ylabel("Gradient") +# ax2.legend() +# plt.tight_layout() +# plt.show() From 3c9c320766c2f2535e1b23c494a502bca51cde64 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Fri, 13 Jun 2025 12:19:46 +0200 Subject: [PATCH 08/16] set inner solver to verbose for debug + pinpoint failing case --- debug_quantile.py | 2 +- skglm/experimental/quantile_huber.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug_quantile.py b/debug_quantile.py index 09dddddda..e6341eb71 100644 --- a/debug_quantile.py +++ b/debug_quantile.py @@ -30,7 +30,7 @@ def pinball_loss(residuals, quantile): quantile=tau, alpha=0.1, delta_init=0.5, - delta_final=0.01, + delta_final=0.001, n_deltas=5, solver="AndersonCD", verbose=True, diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index bc168f2ed..cc179e72f 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -166,7 +166,7 @@ def fit(self, X, y): solver = FISTA(max_iter=self.max_iter, tol=self.tol) solver.warm_start = True elif self.solver == "AndersonCD": - solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, + solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, verbose=3, warm_start=True, fit_intercept=self.fit_intercept) else: raise ValueError(f"Unknown solver: {self.solver}") From 693ca06173152ede43ebf3ce637aad2219763b8d Mon Sep 17 00:00:00 2001 From: mathurinm Date: Fri, 13 Jun 2025 12:21:31 +0200 Subject: [PATCH 09/16] check fit_intercept --- debug_quantile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/debug_quantile.py b/debug_quantile.py index e6341eb71..035b21149 100644 --- a/debug_quantile.py +++ b/debug_quantile.py @@ -17,8 +17,9 @@ def pinball_loss(residuals, quantile): X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=0) tau = 0.8 +fit_intercept = False start = time.time() -sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=True) +sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=fit_intercept) sk.fit(X, y) sk_pred = sk.predict(X) sk_time = time.time() - start @@ -30,11 +31,11 @@ def pinball_loss(residuals, quantile): quantile=tau, alpha=0.1, delta_init=0.5, - delta_final=0.001, + delta_final=0.00001, n_deltas=5, solver="AndersonCD", verbose=True, - fit_intercept=True + fit_intercept=fit_intercept, ) qh.fit(X, y) qh_time = time.time() - start From 303b16769c2dcc656ef546044c3081a3eca51d93 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Fri, 13 Jun 2025 15:15:56 +0200 Subject: [PATCH 10/16] remove FISTA, implement comments, add unit test, add lipschitz on intercept update --- skglm/experimental/quantile_huber.py | 67 +++++++++---------- .../experimental/tests/test_quantile_huber.py | 26 +++++++ 2 files changed, 58 insertions(+), 35 deletions(-) create mode 100644 skglm/experimental/tests/test_quantile_huber.py diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index bc168f2ed..fa3483398 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -3,9 +3,10 @@ from numba import float64 from skglm.datafits.base import BaseDatafit from sklearn.base import BaseEstimator, RegressorMixin -from skglm.solvers import FISTA, AndersonCD +from skglm.solvers import AndersonCD from skglm.penalties import L1 from skglm.estimators import GeneralizedLinearEstimator +from sklearn.exceptions import NotFittedError class QuantileHuber(BaseDatafit): @@ -117,19 +118,25 @@ def get_global_lipschitz(self, X, y): def intercept_update_step(self, y, Xw): n_samples = len(y) - update = 0.0 + + # Compute gradient + grad = 0.0 for i in range(n_samples): residual = y[i] - Xw[i] - update -= self._grad_per_sample(residual) - return update / n_samples + grad -= self._grad_per_sample(residual) + grad /= n_samples + + # Apply step size 1/c + c = max(self.quantile, 1 - self.quantile) / self.delta + return grad / c class SmoothQuantileRegressor(BaseEstimator, RegressorMixin): """Quantile regression with progressive smoothing.""" def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, - n_deltas=10, max_iter=1000, tol=1e-4, verbose=False, - solver="AndersonCD", fit_intercept=True): + n_deltas=10, max_iter=1000, tol=1e-6, verbose=False, + fit_intercept=True): self.quantile = quantile self.alpha = alpha self.delta_init = delta_init @@ -138,7 +145,6 @@ def __init__(self, quantile=0.5, alpha=0.1, delta_init=1.0, delta_final=1e-3, self.max_iter = max_iter self.tol = tol self.verbose = verbose - self.solver = solver self.fit_intercept = fit_intercept def fit(self, X, y): @@ -148,30 +154,16 @@ def fit(self, X, y): if self.verbose: print( - f"Progressive smoothing: delta {self.delta_init:.3f} --> " - f"{self.delta_final:.3f} in {self.n_deltas} steps") + f"Progressive smoothing: delta {self.delta_init:.2e} --> " + f"{self.delta_final:.2e} in {self.n_deltas} steps") datafit = QuantileHuber(quantile=self.quantile, delta=self.delta_init) penalty = L1(alpha=self.alpha) - # Solver selection - if isinstance(self.solver, str): - if self.solver == "FISTA": - if self.fit_intercept: - import warnings - warnings.warn( - "FISTA solver does not support intercept. " - "Falling back to fit_intercept=False." - ) - self.fit_intercept = False - solver = FISTA(max_iter=self.max_iter, tol=self.tol) - solver.warm_start = True - elif self.solver == "AndersonCD": - solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, - warm_start=True, fit_intercept=self.fit_intercept) - else: - raise ValueError(f"Unknown solver: {self.solver}") - else: - solver = self.solver + + # Use AndersonCD solver + solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, + warm_start=True, fit_intercept=self.fit_intercept, + verbose=3) est = GeneralizedLinearEstimator( datafit=datafit, penalty=penalty, solver=solver) @@ -186,21 +178,26 @@ def fit(self, X, y): residuals = y - X @ w if self.fit_intercept: residuals -= est.intercept_ - coverage = np.mean(residuals <= 0) pinball_loss = np.mean(residuals * (self.quantile - (residuals < 0))) print( - f" Stage {i+1:2d}: delta={delta:.4f}, " - f"coverage={coverage:.3f}, pinball_loss={pinball_loss:.6f}, " + f" Stage {i+1:2d}: delta={delta:.2e}, " + f"pinball_loss={pinball_loss:.6f}, " f"n_iter={est.n_iter_}" ) - self.est = est + self.est_ = est + self.coef_ = est.coef_ + if self.fit_intercept: + self.intercept_ = est.intercept_ return self def predict(self, X): """Predict using the fitted model.""" - if not hasattr(self, "est"): - raise ValueError("Call 'fit' before 'predict'.") - return self.est.predict(X) + if not hasattr(self, "est_"): + raise NotFittedError( + "This SmoothQuantileRegressor instance is not fitted yet. " + "Call 'fit' with appropriate arguments before using this estimator." + ) + return self.est_.predict(X) diff --git a/skglm/experimental/tests/test_quantile_huber.py b/skglm/experimental/tests/test_quantile_huber.py new file mode 100644 index 000000000..ad0e2f1e5 --- /dev/null +++ b/skglm/experimental/tests/test_quantile_huber.py @@ -0,0 +1,26 @@ +import pytest +from numpy.testing import assert_allclose +from sklearn.linear_model import QuantileRegressor +from sklearn.datasets import make_regression +from skglm.experimental.quantile_huber import SmoothQuantileRegressor + + +@pytest.mark.parametrize('quantile', [0.3, 0.5, 0.7]) +def test_quantile_huber_matches_sklearn(quantile): + """Test that SmoothQuantileRegressor with small delta matches sklearn's + QuantileRegressor.""" + X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) + + sk_est = QuantileRegressor(quantile=quantile, alpha=0.1, solver='highs').fit(X, y) + smooth_est = SmoothQuantileRegressor( + quantile=quantile, + alpha=0.1, + delta_init=0.5, + delta_final=0.00001, + n_deltas=15, + verbose=True, + fit_intercept=True, + ).fit(X, y) + + assert_allclose(smooth_est.coef_, sk_est.coef_, atol=1e-4) + assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-4) From 2c84a44178ad44de87d562e467dcea7e339bbe15 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Fri, 13 Jun 2025 15:43:47 +0200 Subject: [PATCH 11/16] remove solver selection from plotting example, fixes CircleCI --- examples/plot_smooth_quantile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index eca638a3c..c2608685c 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -32,7 +32,6 @@ def pinball_loss(residuals, quantile): delta_init=0.5, delta_final=0.01, n_deltas=5, - solver="AndersonCD", verbose=True, fit_intercept=True ) From 7d7106c110ac866438557c6a89a929f3af35e664 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 16 Jun 2025 14:14:40 +0200 Subject: [PATCH 12/16] add fit_intercept=False to pytest, but failing --- skglm/experimental/tests/test_quantile_huber.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/skglm/experimental/tests/test_quantile_huber.py b/skglm/experimental/tests/test_quantile_huber.py index ad0e2f1e5..a4011593a 100644 --- a/skglm/experimental/tests/test_quantile_huber.py +++ b/skglm/experimental/tests/test_quantile_huber.py @@ -6,7 +6,8 @@ @pytest.mark.parametrize('quantile', [0.3, 0.5, 0.7]) -def test_quantile_huber_matches_sklearn(quantile): +@pytest.mark.parametrize('fit_intercept', [True, False]) +def test_quantile_huber_matches_sklearn(quantile, fit_intercept): """Test that SmoothQuantileRegressor with small delta matches sklearn's QuantileRegressor.""" X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) @@ -19,8 +20,9 @@ def test_quantile_huber_matches_sklearn(quantile): delta_final=0.00001, n_deltas=15, verbose=True, - fit_intercept=True, + fit_intercept=fit_intercept, ).fit(X, y) assert_allclose(smooth_est.coef_, sk_est.coef_, atol=1e-4) - assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-4) + if fit_intercept: + assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-4) From 34b083dbf1d7a3c49bd36dae3584165b221a436f Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 16 Jun 2025 14:24:33 +0200 Subject: [PATCH 13/16] parametrize intercept works now in unit test, still warnings though --- skglm/experimental/tests/test_quantile_huber.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skglm/experimental/tests/test_quantile_huber.py b/skglm/experimental/tests/test_quantile_huber.py index a4011593a..db22dfb81 100644 --- a/skglm/experimental/tests/test_quantile_huber.py +++ b/skglm/experimental/tests/test_quantile_huber.py @@ -12,7 +12,8 @@ def test_quantile_huber_matches_sklearn(quantile, fit_intercept): QuantileRegressor.""" X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) - sk_est = QuantileRegressor(quantile=quantile, alpha=0.1, solver='highs').fit(X, y) + sk_est = QuantileRegressor(quantile=quantile, alpha=0.1, + solver='highs', fit_intercept=fit_intercept).fit(X, y) smooth_est = SmoothQuantileRegressor( quantile=quantile, alpha=0.1, From ba3d6b8008e1dcb8e37e9d91e72235ec3c222725 Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 16 Jun 2025 15:15:58 +0200 Subject: [PATCH 14/16] adress remaining comments, loosen tolerance, warnings are still there --- skglm/experimental/quantile_huber.py | 2 +- skglm/experimental/tests/test_quantile_huber.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index fa3483398..2923c2642 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -163,7 +163,7 @@ def fit(self, X, y): # Use AndersonCD solver solver = AndersonCD(max_iter=self.max_iter, tol=self.tol, warm_start=True, fit_intercept=self.fit_intercept, - verbose=3) + verbose=max(0, self.verbose - 1)) est = GeneralizedLinearEstimator( datafit=datafit, penalty=penalty, solver=solver) diff --git a/skglm/experimental/tests/test_quantile_huber.py b/skglm/experimental/tests/test_quantile_huber.py index db22dfb81..b455c5bf1 100644 --- a/skglm/experimental/tests/test_quantile_huber.py +++ b/skglm/experimental/tests/test_quantile_huber.py @@ -1,4 +1,5 @@ import pytest +import numpy as np from numpy.testing import assert_allclose from sklearn.linear_model import QuantileRegressor from sklearn.datasets import make_regression @@ -20,10 +21,15 @@ def test_quantile_huber_matches_sklearn(quantile, fit_intercept): delta_init=0.5, delta_final=0.00001, n_deltas=15, - verbose=True, + verbose=False, fit_intercept=fit_intercept, ).fit(X, y) - assert_allclose(smooth_est.coef_, sk_est.coef_, atol=1e-4) + assert not np.allclose(sk_est.coef_, 0, atol=1e-8), ( + "All coefficients in sk_est are (near) zero: alpha may be too high.") + assert not np.allclose(smooth_est.coef_, 0, atol=1e-8), ( + "All coefficients in smooth_est are (near) zero: alpha may be too high.") + + assert_allclose(smooth_est.coef_, sk_est.coef_, atol=1e-3) if fit_intercept: - assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-4) + assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-3) From f43c190c15556ffe562f94b1a7190a0e5b97f0ba Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Mon, 16 Jun 2025 15:50:20 +0200 Subject: [PATCH 15/16] suppress warnings, loosen tolerance, edit whats new --- doc/changes/0.5.rst | 1 + .../experimental/tests/test_quantile_huber.py | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst index c807b80d1..4502d879c 100644 --- a/doc/changes/0.5.rst +++ b/doc/changes/0.5.rst @@ -3,3 +3,4 @@ Version 0.5 (in progress) ------------------------- - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`) +- Add experimental QuantileHuber and SmoothQuantileRegressor for quantile regression, and an example script (PR: :gh:`312`). diff --git a/skglm/experimental/tests/test_quantile_huber.py b/skglm/experimental/tests/test_quantile_huber.py index b455c5bf1..c81a54595 100644 --- a/skglm/experimental/tests/test_quantile_huber.py +++ b/skglm/experimental/tests/test_quantile_huber.py @@ -4,6 +4,7 @@ from sklearn.linear_model import QuantileRegressor from sklearn.datasets import make_regression from skglm.experimental.quantile_huber import SmoothQuantileRegressor +import warnings @pytest.mark.parametrize('quantile', [0.3, 0.5, 0.7]) @@ -13,23 +14,25 @@ def test_quantile_huber_matches_sklearn(quantile, fit_intercept): QuantileRegressor.""" X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42) - sk_est = QuantileRegressor(quantile=quantile, alpha=0.1, - solver='highs', fit_intercept=fit_intercept).fit(X, y) - smooth_est = SmoothQuantileRegressor( - quantile=quantile, - alpha=0.1, - delta_init=0.5, - delta_final=0.00001, - n_deltas=15, - verbose=False, - fit_intercept=fit_intercept, - ).fit(X, y) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=RuntimeWarning) + sk_est = QuantileRegressor(quantile=quantile, alpha=0.1, solver='highs', + fit_intercept=fit_intercept).fit(X, y) + smooth_est = SmoothQuantileRegressor( + quantile=quantile, + alpha=0.1, + delta_init=0.5, + delta_final=0.00001, + n_deltas=15, + verbose=False, + fit_intercept=fit_intercept, + ).fit(X, y) assert not np.allclose(sk_est.coef_, 0, atol=1e-8), ( "All coefficients in sk_est are (near) zero: alpha may be too high.") assert not np.allclose(smooth_est.coef_, 0, atol=1e-8), ( "All coefficients in smooth_est are (near) zero: alpha may be too high.") - assert_allclose(smooth_est.coef_, sk_est.coef_, atol=1e-3) + assert_allclose(smooth_est.coef_, sk_est.coef_, atol=2e-3) if fit_intercept: - assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=1e-3) + assert_allclose(smooth_est.intercept_, sk_est.intercept_, atol=2e-3) From cb0c3e40ad27d4e167126c095734a3f7b0d2c7ba Mon Sep 17 00:00:00 2001 From: floriankozikowski Date: Wed, 18 Jun 2025 15:34:05 +0200 Subject: [PATCH 16/16] adress final comments (api, remove debug, etc.), improve example for webpage --- debug_quantile.py | 78 -------------- doc/api.rst | 2 + doc/changes/0.5.rst | 2 +- examples/plot_smooth_quantile.py | 148 +++++++++++++++++++-------- skglm/experimental/__init__.py | 4 +- skglm/experimental/quantile_huber.py | 8 +- 6 files changed, 114 insertions(+), 128 deletions(-) delete mode 100644 debug_quantile.py diff --git a/debug_quantile.py b/debug_quantile.py deleted file mode 100644 index 035b21149..000000000 --- a/debug_quantile.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -QuantileHuber vs Sklearn -""" -import numpy as np -import time -from sklearn.linear_model import QuantileRegressor -from skglm.experimental.quantile_huber import QuantileHuber, SmoothQuantileRegressor -import matplotlib.pyplot as plt -from sklearn.datasets import make_regression - - -def pinball_loss(residuals, quantile): - """True pinball loss.""" - return np.mean(residuals * (quantile - (residuals < 0))) - - -X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=0) -tau = 0.8 - -fit_intercept = False -start = time.time() -sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=fit_intercept) -sk.fit(X, y) -sk_pred = sk.predict(X) -sk_time = time.time() - start -sk_cov = np.mean(y <= sk_pred) -sk_pinball = pinball_loss(y - sk_pred, tau) - -start = time.time() -qh = SmoothQuantileRegressor( - quantile=tau, - alpha=0.1, - delta_init=0.5, - delta_final=0.00001, - n_deltas=5, - solver="AndersonCD", - verbose=True, - fit_intercept=fit_intercept, -) -qh.fit(X, y) -qh_time = time.time() - start -qh_pred = qh.predict(X) -qh_cov = np.mean(y <= qh_pred) -qh_pinball = pinball_loss(y - qh_pred, tau) - - -print(sk.coef_) -print(qh.est.coef_) - -# print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " -# f"{'Pinball':<8}") -# print("-" * 55) -# print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " -# f"{sk_pinball:<8.4f}") -# print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " -# f"{qh_pinball:<8.4f}") - - -# quantiles = [0.1, 0.5, 0.9] -# delta = 0.5 -# residuals = np.linspace(-3, 3, 500) -# _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) -# for tau in quantiles: -# qh = QuantileHuber(quantile=tau, delta=delta) -# loss = [qh._loss_sample(r) for r in residuals] -# grad = [qh._grad_per_sample(r) for r in residuals] -# ax1.plot(residuals, loss, label=f"τ={tau}") -# ax2.plot(residuals, grad, label=f"τ={tau}") -# ax1.set_title("QuantileHuber Loss") -# ax1.set_xlabel("Residual") -# ax1.set_ylabel("Loss") -# ax1.legend() -# ax2.set_title("QuantileHuber Gradient") -# ax2.set_xlabel("Residual") -# ax2.set_ylabel("Gradient") -# ax2.legend() -# plt.tight_layout() -# plt.show() diff --git a/doc/api.rst b/doc/api.rst index b03757301..bfed8dc8a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -105,5 +105,7 @@ Experimental IterativeReweightedL1 PDCD_WS Pinball + QuantileHuber + SmoothQuantileRegressor SqrtQuadratic SqrtLasso diff --git a/doc/changes/0.5.rst b/doc/changes/0.5.rst index 4502d879c..14092d063 100644 --- a/doc/changes/0.5.rst +++ b/doc/changes/0.5.rst @@ -3,4 +3,4 @@ Version 0.5 (in progress) ------------------------- - Add support for fitting an intercept in :ref:`SqrtLasso ` (PR: :gh:`298`) -- Add experimental QuantileHuber and SmoothQuantileRegressor for quantile regression, and an example script (PR: :gh:`312`). +- Add experimental :ref:`QuantileHuber ` and :ref:`SmoothQuantileRegressor ` for quantile regression, and an example script (PR: :gh:`312`). diff --git a/examples/plot_smooth_quantile.py b/examples/plot_smooth_quantile.py index c2608685c..992ed739e 100644 --- a/examples/plot_smooth_quantile.py +++ b/examples/plot_smooth_quantile.py @@ -1,72 +1,132 @@ """ -QuantileHuber vs Sklearn +================================================================================ +Smooth Quantile Regression with QuantileHuber +================================================================================ + +This example compares sklearn's standard quantile regression with skglm's smooth +approximation. Skglm's quantile regression uses a smooth Huber-like approximation +(quadratic near zero, linear in the tails) to replace the non-differentiable +pinball loss. Progressive smoothing enables efficient gradient-based optimization, +maintaining speed and accuracy also on large-scale, high-dimensional datasets. """ + +# Author: Florian Kozikowski import numpy as np import time -from sklearn.linear_model import QuantileRegressor -from skglm.experimental.quantile_huber import QuantileHuber, SmoothQuantileRegressor import matplotlib.pyplot as plt -from sklearn.datasets import make_regression - - -def pinball_loss(residuals, quantile): - """True pinball loss.""" - return np.mean(residuals * (quantile - (residuals < 0))) +from sklearn.datasets import make_regression +from sklearn.linear_model import QuantileRegressor +from skglm.experimental.quantile_huber import QuantileHuber, SmoothQuantileRegressor +# Generate regression data X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=0) -tau = 0.8 +tau = 0.8 # 80th percentile + +# %% +# Compare standard vs smooth quantile regression +# ---------------------------------------------- +# Both methods solve the same problem but with different loss functions. +# Standard quantile regression (sklearn) start = time.time() -sk = QuantileRegressor(quantile=tau, alpha=0.1, fit_intercept=True) -sk.fit(X, y) -sk_pred = sk.predict(X) +sk_model = QuantileRegressor(quantile=tau, alpha=0.1) +sk_model.fit(X, y) sk_time = time.time() - start -sk_cov = np.mean(y <= sk_pred) -sk_pinball = pinball_loss(y - sk_pred, tau) +# Smooth quantile regression (skglm) start = time.time() -qh = SmoothQuantileRegressor( +smooth_model = SmoothQuantileRegressor( quantile=tau, alpha=0.1, - delta_init=0.5, - delta_final=0.01, - n_deltas=5, - verbose=True, - fit_intercept=True + delta_init=0.5, # Initial smoothing parameter + delta_final=0.01, # Final smoothing (smaller = closer to true quantile) + n_deltas=5 # Number of continuation steps ) -qh.fit(X, y) -qh_time = time.time() - start -qh_pred = qh.predict(X) -qh_cov = np.mean(y <= qh_pred) -qh_pinball = pinball_loss(y - qh_pred, tau) +smooth_model.fit(X, y) +smooth_time = time.time() - start -print(f"{'Method':<12} {'Q':<4} {'Coverage':<8} {'Time':<6} " - f"{'Pinball':<8}") -print("-" * 55) -print(f"{'Sklearn':<12} {tau:<4} {sk_cov:<8.3f} {sk_time:<6.3f} " - f"{sk_pinball:<8.4f}") -print(f"{'QuantileHuber':<12} {tau:<4} {qh_cov:<8.3f} {qh_time:<6.3f} " - f"{qh_pinball:<8.4f}") +# %% +# Evaluate both methods +# --------------------- +# Coverage: fraction of true values below predictions (should ≈ tau) +# Pinball loss: standard quantile regression evaluation metric +# +# Note: No robust benchmarking conducted yet. The speed advantagous likely only +# shows on large-scale, high-dimensional datasets. The sklearn implementation is +# likely faster on small datasets. -quantiles = [0.1, 0.5, 0.9] -delta = 0.5 +def pinball_loss(residuals, quantile): + return np.mean(residuals * (quantile - (residuals < 0))) + + +sk_pred = sk_model.predict(X) +smooth_pred = smooth_model.predict(X) + +print(f"{'Method':<15} {'Coverage':<10} {'Time (s)':<10} {'Pinball Loss':<12}") +print("-" * 50) +print(f"{'Sklearn':<15} {np.mean(y <= sk_pred):<10.3f} {sk_time:<10.3f} " + f"{pinball_loss(y - sk_pred, tau):<12.4f}") +print(f"{'SmoothQuantile':<15} {np.mean(y <= smooth_pred):<10.3f} {smooth_time:<10.3f} " + f"{pinball_loss(y - smooth_pred, tau):<12.4f}") + +# %% +# Visualize the smooth approximation +# ---------------------------------- +# The smooth loss approximates the pinball loss but with continuous gradients + +fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) + +# Show loss and gradient for different quantile levels residuals = np.linspace(-3, 3, 500) -_, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) -for tau in quantiles: - qh = QuantileHuber(quantile=tau, delta=delta) +delta = 0.5 +quantiles = [0.1, 0.5, 0.9] + +for tau_val in quantiles: + qh = QuantileHuber(quantile=tau_val, delta=delta) loss = [qh._loss_sample(r) for r in residuals] grad = [qh._grad_per_sample(r) for r in residuals] - ax1.plot(residuals, loss, label=f"τ={tau}") - ax2.plot(residuals, grad, label=f"τ={tau}") -ax1.set_title("QuantileHuber Loss") + + # Compute pinball loss for each residual + pinball_loss = [r * (tau_val - (r < 0)) for r in residuals] + + # Plot smooth loss and pinball loss + ax1.plot(residuals, loss, label=f"τ={tau_val}", linewidth=2) + ax1.plot(residuals, pinball_loss, '--', alpha=0.4, color='gray', + label=f"Pinball τ={tau_val}") + ax2.plot(residuals, grad, label=f"τ={tau_val}", linewidth=2) + +# Add vertical lines and shading showing delta boundaries +for ax in [ax1, ax2]: + ax.axvline(-delta, color='gray', linestyle='--', alpha=0.7, linewidth=1.5) + ax.axvline(delta, color='gray', linestyle='--', alpha=0.7, linewidth=1.5) + # Add shading for quadratic region + ax.axvspan(-delta, delta, alpha=0.15, color='gray') + +# Add delta labels +ax1.text(-delta, 0.1, '−δ', ha='right', va='bottom', color='gray', fontsize=10) +ax1.text(delta, 0.1, '+δ', ha='left', va='bottom', color='gray', fontsize=10) + +ax1.set_title(f"Smooth Quantile Loss (δ={delta})", fontsize=12) ax1.set_xlabel("Residual") ax1.set_ylabel("Loss") -ax1.legend() -ax2.set_title("QuantileHuber Gradient") +ax1.legend(loc='upper left') +ax1.grid(True, alpha=0.3) + +ax2.set_title("Gradient (continuous everywhere)", fontsize=12) ax2.set_xlabel("Residual") ax2.set_ylabel("Gradient") -ax2.legend() +ax2.legend(loc='upper left') +ax2.grid(True, alpha=0.3) + plt.tight_layout() plt.show() + +# %% [markdown] +# The left plot shows the asymmetric loss: tau=0.1 penalizes overestimation more, +# while tau=0.9 penalizes underestimation. As delta decreases towards zero, the +# loss function approaches the standard pinball loss. +# The right plot reveals the key advantage: gradients transition smoothly through +# zero, unlike standard quantile regression which has a kink. This smoothing +# enables fast convergence with gradient-based solvers. diff --git a/skglm/experimental/__init__.py b/skglm/experimental/__init__.py index caa1aa819..a69927871 100644 --- a/skglm/experimental/__init__.py +++ b/skglm/experimental/__init__.py @@ -8,8 +8,8 @@ IterativeReweightedL1, PDCD_WS, Pinball, - SqrtQuadratic, - SqrtLasso, QuantileHuber, SmoothQuantileRegressor, + SqrtQuadratic, + SqrtLasso, ] diff --git a/skglm/experimental/quantile_huber.py b/skglm/experimental/quantile_huber.py index 2923c2642..b7c43f691 100644 --- a/skglm/experimental/quantile_huber.py +++ b/skglm/experimental/quantile_huber.py @@ -1,12 +1,14 @@ import numpy as np from numpy.linalg import norm from numba import float64 -from skglm.datafits.base import BaseDatafit + from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.exceptions import NotFittedError + +from skglm.datafits.base import BaseDatafit from skglm.solvers import AndersonCD from skglm.penalties import L1 from skglm.estimators import GeneralizedLinearEstimator -from sklearn.exceptions import NotFittedError class QuantileHuber(BaseDatafit): @@ -29,7 +31,7 @@ class QuantileHuber(BaseDatafit): quantile : float, default=0.5 Desired quantile level between 0 and 1. delta : float, default=1.0 - Width of quadratic region. + Smoothing parameter (0 mean no smoothing). """ def __init__(self, quantile=0.5, delta=1.0):