py-why · moprescu · Nov 17, 2019 · Nov 14, 2019 · Nov 14, 2019 · Nov 14, 2019
diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py
@@ -11,7 +11,7 @@
 from .bootstrap import BootstrapEstimator
 from .inference import BootstrapInference
 from .utilities import tensordot, ndim, reshape, shape
-from .inference import StatsModelsInference, StatsModelsInferenceDiscrete
+from .inference import StatsModelsInference, StatsModelsInferenceDiscrete, LinearModelFinalInference
 
 
 class BaseCateEstimator(metaclass=abc.ABCMeta):
@@ -381,26 +381,21 @@ def effect(self, X=None, *, T0=0, T1=1):
     effect.__doc__ = BaseCateEstimator.effect.__doc__
 
 
-class StatsModelsCateEstimatorMixin(BaseCateEstimator):
+class StatsModelsCateEstimatorMixin:
 
     def _get_inference_options(self):
         # add statsmodels to parent's options
         options = super()._get_inference_options()
         options.update(statsmodels=StatsModelsInference)
         return options
 
-    @property
-    @abc.abstractmethod
-    def statsmodels(self):
-        pass
-
     @property
     def coef_(self):
-        return self.statsmodels.coef_
+        return self.model_final.coef_
 
     @property
     def intercept_(self):
-        return self.statsmodels.intercept_
+        return self.model_final.intercept_
 
     @BaseCateEstimator._defer_to_inference
     def coef__interval(self, *, alpha=0.1):
@@ -411,6 +406,15 @@ def intercept__interval(self, *, alpha=0.1):
         pass
 
 
+class DebiasedLassoCateEstimatorMixin(StatsModelsCateEstimatorMixin):
+
+    def _get_inference_options(self):
+        # add statsmodels to parent's options
+        options = super()._get_inference_options()
+        options.update(debiasedlasso=LinearModelFinalInference)
+        return options
+
+
 class StatsModelsCateEstimatorDiscreteMixin(BaseCateEstimator):
     # TODO Create parent StatsModelsCateEstimatorMixin class so that some functionalities can be shared
 

diff --git a/econml/dml.py b/econml/dml.py
@@ -15,6 +15,7 @@
 from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, inverse_onehot,
                         broadcast_unit_treatments, reshape_treatmentwise_effects,
                         StatsModelsLinearRegression, LassoCVWrapper)
+from econml.sklearn_extensions.linear_model import MultiOutputDebiasedLasso
 from sklearn.model_selection import KFold, StratifiedKFold, check_cv
 from sklearn.linear_model import LinearRegression, LassoCV
 from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
@@ -23,7 +24,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.utils import check_random_state
 from .cate_estimator import (BaseCateEstimator, LinearCateEstimator,
-                             TreatmentExpansionMixin, StatsModelsCateEstimatorMixin)
+                             TreatmentExpansionMixin, StatsModelsCateEstimatorMixin,
+                             DebiasedLassoCateEstimatorMixin)
 from .inference import StatsModelsInference
 from ._rlearner import _RLearner
 
@@ -299,12 +301,15 @@ def statsmodels(self):
         return self.model_final
 
 
-class SparseLinearDMLCateEstimator(DMLCateEstimator):
+class SparseLinearDMLCateEstimator(DebiasedLassoCateEstimatorMixin, DMLCateEstimator):
     """
     A specialized version of the Double ML estimator for the sparse linear case.
 
-    Specifically, this estimator can be used when the controls are high-dimensional
-    and the coefficients of the nuisance functions are sparse.
+    This estimator should be used when the features of heterogeneity are high-dimensional
+    and the coefficients of the linear CATE function are sparse.
+
+    The last stage is an instance of the
+    :class:`MultiOutputDebiasedLasso <econml.sklearn_extensions.linear_model.MultiOutputDebiasedLasso>`
 
     Parameters
     ----------
@@ -316,9 +321,21 @@ class SparseLinearDMLCateEstimator(DMLCateEstimator):
         The estimator for fitting the treatment to the features. Must implement
         `fit` and `predict` methods, and must be a linear model for correctness.
 
-    model_final: estimator, optional (default is :class:`LassoCV(fit_intercept=False)  <sklearn.linear_model.LassoCV>`)
-        The estimator for fitting the response residuals to the treatment residuals. Must implement
-        `fit` and `predict` methods, and must be a linear model with no intercept for correctness.
+    alpha: string | float, optional. Default='auto'.
+        CATE L1 regularization applied through the debiased lasso in the final model.
+        'auto' corresponds to a CV form of the :class:`MultiOutputDebiasedLasso`.
+
+    max_iter : int, optional, default=1000
+        The maximum number of iterations in the Debiased Lasso
+
+    tol : float, optional, default=1e-4
+        The tolerance for the optimization: if the updates are
+        smaller than ``tol``, the optimization code checks the
+        dual gap for optimality and continues until it is smaller
+        than ``tol``.
+
+    positive : bool, optional, default=False
+        When set to ``True``, forces the coefficients of teh DebiasedLasso to be positive.
 
     featurizer: transformer, optional
     (default is :class:`PolynomialFeatures(degree=1, include_bias=True) <sklearn.preprocessing.PolynomialFeatures>`)
@@ -356,12 +373,22 @@ class SparseLinearDMLCateEstimator(DMLCateEstimator):
     """
 
     def __init__(self,
-                 model_y=LassoCV(), model_t=LassoCV(), model_final=LassoCVWrapper(fit_intercept=False),
+                 model_y=LassoCV(), model_t=LassoCV(),
+                 alpha='auto',
+                 max_iter=1000,
+                 tol=1e-4,
+                 positive=False,
                  featurizer=PolynomialFeatures(degree=1, include_bias=True),
                  linear_first_stages=True,
                  discrete_treatment=False,
                  n_splits=2,
                  random_state=None):
+        model_final = MultiOutputDebiasedLasso(
+            alpha=alpha,
+            fit_intercept=False,
+            max_iter=max_iter,
+            tol=tol,
+            positive=positive)
         super().__init__(model_y=model_y,
                          model_t=model_t,
                          model_final=model_final,
@@ -371,6 +398,52 @@ def __init__(self,
                          n_splits=n_splits,
                          random_state=random_state)
 
+    def fit(self, Y, T, X=None, W=None, sample_weight=None, inference=None):
+        """
+        Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·).
+
+        Parameters
+        ----------
+        Y: (n × d_y) matrix or vector of length n
+            Outcomes for each sample
+        T: (n × dₜ) matrix or vector of length n
+            Treatments for each sample
+        X: optional (n × dₓ) matrix
+            Features for each sample
+        W: optional (n × d_w) matrix
+            Controls for each sample
+        sample_weight: optional (n,) vector
+            Weights for each row
+        inference: string, `Inference` instance, or None
+            Method for performing inference.  This estimator supports 'bootstrap'
+            (or an instance of :class:`.BootstrapInference`) and 'debiasedlasso'
+            (or an instance of :class:`.LinearCateInference`)
+
+        Returns
+        -------
+        self
+        """
+        # TODO: support sample_var
+        if sample_weight is not None and inference is not None:
+            warn("This estimator does not yet support sample variances and inference does not take "
+                 "sample variances into account. This feature will be supported in a future release.")
+        self._check_sparsity(X, T)
+        return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=None, inference=inference)
+
+    def _check_sparsity(self, X, T):
+        # Check if model is sparse enough for this model
+        if X is None:
+            d_x = 1
+        else:
+            d_x = clone(self.featurizer, safe=False).fit_transform(X[[0], :]).shape[1]
+        if self._discrete_treatment:
+            d_t = len(set(T.flatten())) - 1
+        else:
+            d_t = 1 if np.ndim(T) < 2 else T.shape[1]
+        if d_x * d_t < 5:
+            warn("The number of features in the final model (< 5) is too small for a sparse model. "
+                 "We recommend using the LinearDMLCateEstimator for this low-dimensional setting.", UserWarning)
+
 
 class KernelDMLCateEstimator(LinearDMLCateEstimator):
     """

diff --git a/econml/inference.py b/econml/inference.py
@@ -3,6 +3,7 @@
 
 import abc
 import numpy as np
+from scipy.stats import norm
 from .bootstrap import BootstrapEstimator
 from .utilities import cross_product, broadcast_unit_treatments, reshape_treatmentwise_effects, ndim
 
@@ -60,37 +61,13 @@ def wrapped(*args, alpha=0.1, **kwargs):
         return wrapped
 
 
-class StatsModelsInference(Inference):
-    """
-    Stores statsmodels covariance options.
-
-    This class can be used for inference by the LinearDMLCateEstimator.
-
-    Any estimator that supports this method of inference must implement a `statsmodels`
-    property that returns a :class:`.StatsModelsLinearRegression` instance and a `featurizer` property that returns an
-    preprocessing featurizer for the X variable.
-
-    Parameters
-    ----------
-    cov_type : string, optional (default 'HC1')
-        The type of covariance estimation method to use.  Supported values are 'nonrobust',
-        'HC0', 'HC1'.
+class LinearModelFinalInference(Inference):
 
-    TODO Create parent StatsModelsInference class so that some functionalities can be shared
-    """
-
-    def __init__(self, cov_type='HC1'):
-        if cov_type not in ['nonrobust', 'HC0', 'HC1']:
-            raise ValueError("Unsupported cov_type; "
-                             "must be one of 'nonrobust', "
-                             "'HC0', 'HC1'")
-
-        self.cov_type = cov_type
+    def __init__(self):
+        pass
 
     def prefit(self, estimator, *args, **kwargs):
-        self.statsmodels = estimator.statsmodels
-        # need to set the fit args before the estimator is fit
-        self.statsmodels.cov_type = self.cov_type
+        self.model_final = estimator.model_final
         self.featurizer = estimator.featurizer if hasattr(estimator, 'featurizer') else None
 
     def fit(self, estimator, *args, **kwargs):
@@ -106,23 +83,51 @@ def effect_interval(self, X, *, T0, T1, alpha=0.1):
             X = np.ones((T0.shape[0], 1))
         elif self.featurizer is not None:
             X = self.featurizer.transform(X)
-        return self.statsmodels.predict_interval(cross_product(X, T1 - T0), alpha=alpha)
+        return self._predict_interval(cross_product(X, T1 - T0), alpha=alpha)
 
     def const_marginal_effect_interval(self, X, *, alpha=0.1):
         if X is None:
             X = np.ones((1, 1))
         elif self.featurizer is not None:
             X = self.featurizer.fit_transform(X)
         X, T = broadcast_unit_treatments(X, self._d_t[0] if self._d_t else 1)
-        preds = self.statsmodels.predict_interval(cross_product(X, T), alpha=alpha)
+        preds = self._predict_interval(cross_product(X, T), alpha=alpha)
         return tuple(reshape_treatmentwise_effects(pred, self._d_t, self._d_y)
                      for pred in preds)
 
     def coef__interval(self, *, alpha=0.1):
-        return self.statsmodels.coef__interval(alpha)
+        return self.model_final.coef__interval(alpha)
 
     def intercept__interval(self, *, alpha=0.1):
-        return self.statsmodels.intercept__interval(alpha)
+        return self.model_final.intercept__interval(alpha)
+
+    def _predict_interval(self, X, alpha):
+        return self.model_final.predict_interval(X, alpha=alpha)
+
+
+class StatsModelsInference(LinearModelFinalInference):
+    """Stores statsmodels covariance options.
+
+    This class can be used for inference by the LinearDMLCateEstimator.
+
+    Parameters
+    ----------
+    cov_type : string, optional (default 'HC1')
+        The type of covariance estimation method to use.  Supported values are 'nonrobust',
+        'HC0', 'HC1'.
+    """
+
+    def __init__(self, cov_type='HC1'):
+        if cov_type not in ['nonrobust', 'HC0', 'HC1']:
+            raise ValueError("Unsupported cov_type; "
+                             "must be one of 'nonrobust', "
+                             "'HC0', 'HC1'")
+
+        self.cov_type = cov_type
+
+    def prefit(self, estimator, *args, **kwargs):
+        super().prefit(estimator, *args, **kwargs)
+        self.model_final.cov_type = self.cov_type
 
 
 class StatsModelsInferenceDiscrete(Inference):