From 9f3861271e7a5000d80864fa9e7a783a4a8d5a8b Mon Sep 17 00:00:00 2001
From: TomAugspurger <thomas-augspurger@uiowa.edu>
Date: Thu, 6 Feb 2014 15:41:22 -0600
Subject: [PATCH 1/2] BUG: preserve dtypes in interpolate

---
 doc/source/release.rst       |  7 +++-
 doc/source/v0.14.0.txt       |  3 ++
 pandas/core/generic.py       |  6 +--
 pandas/tests/test_generic.py | 79 ++++++++++++++++++++++++++++--------
 vb_suite/frame_methods.py    | 26 ++++++++++++
 5 files changed, 98 insertions(+), 23 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 6e1632f036f38..31d3b88094d37 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -66,7 +66,8 @@ API Changes
 - ``df['col'] = value`` and ``df.loc[:,'col'] = value`` are now completely equivalent;
   previously the ``.loc`` would not necessarily coerce the dtype of the resultant series (:issue:`6149`)
 - ``dtypes`` and ``ftypes`` now return a series with ``dtype=object`` on empty containers (:issue:`5740`)
-
+- The ``interpolate`` ``downcast`` keyword default has been changed from ``infer`` to
+  ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`).
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~
@@ -115,12 +116,16 @@ Bug Fixes
 - TimeGrouper has a more compatible API to the rest of the groupers (e.g. ``groups`` was missing) (:issue:`3881`)
 - Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'``
   (:issue:`6351`)
+<<<<<<< HEAD
 - Bug correctly handle placements of ``-inf`` in Panels when dividing by integer 0 (:issue:`6178`)
 - ``DataFrame.shift`` with ``axis=1`` was raising (:issue:`6371`)
 - Disabled clipboard tests until release time (run locally with ``nosetests -A disabled`` (:issue:`6048`).
 - Bug in ``DataFrame.replace()`` when passing a nested ``dict`` that contained
   keys not in the values to be replaced (:issue:`6342`)
 - Bug in take with duplicate columns not consolidated (:issue:`6240`)
+=======
+- Bug in interpolate changing dtypes (:issue:`6290`)
+>>>>>>> 336b309... BUG: preserve dtypes in interpolate
 
 pandas 0.13.1
 -------------
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
index 58ae5084c4827..a3839542dafcc 100644
--- a/doc/source/v0.14.0.txt
+++ b/doc/source/v0.14.0.txt
@@ -29,6 +29,9 @@ API changes
       df.iloc[:,2:3]
       df.iloc[:,1:3]
 
+- The ``DataFrame.interpolate()`` ``downcast`` keyword default has been changed from ``infer`` to
+  ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`).
+
 MultiIndexing Using Slicers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f8dbe079610c0..b9ffeb636615b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2435,7 +2435,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             return self._constructor(new_data).__finalize__(self)
 
     def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
-                    downcast='infer', **kwargs):
+                    downcast=None, **kwargs):
         """
         Interpolate values according to different methods.
 
@@ -2468,7 +2468,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
             Maximum number of consecutive NaNs to fill.
         inplace : bool, default False
             Update the NDFrame in place if possible.
-        downcast : optional, 'infer' or None, defaults to 'infer'
+        downcast : optional, 'infer' or None, defaults to None
             Downcast dtypes if possible.
 
         Returns
@@ -2492,7 +2492,6 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
         dtype: float64
 
         """
-
         if self.ndim > 2:
             raise NotImplementedError("Interpolate has not been implemented "
                                       "on Panel and Panel 4D objects.")
@@ -2534,7 +2533,6 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
                                           inplace=inplace,
                                           downcast=downcast,
                                           **kwargs)
-
         if inplace:
             if axis == 1:
                 self._update_inplace(new_data)
diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
index d694efff9b351..7e4b23b633477 100644
--- a/pandas/tests/test_generic.py
+++ b/pandas/tests/test_generic.py
@@ -459,7 +459,10 @@ def test_interpolate(self):
         self.assert_numpy_array_equal(time_interp, ord_ts)
 
         # try time interpolation on a non-TimeSeries
-        self.assertRaises(ValueError, self.series.interpolate, method='time')
+        # Only raises ValueError if there are NaNs.
+        non_ts = self.series.copy()
+        non_ts[0] = np.NaN
+        self.assertRaises(ValueError, non_ts.interpolate, method='time')
 
     def test_interp_regression(self):
         _skip_if_no_scipy()
@@ -512,7 +515,7 @@ def test_interpolate_non_ts(self):
     def test_nan_interpolate(self):
         s = Series([0, 1, np.nan, 3])
         result = s.interpolate()
-        expected = Series([0, 1, 2, 3])
+        expected = Series([0., 1., 2., 3.])
         assert_series_equal(result, expected)
 
         _skip_if_no_scipy()
@@ -522,20 +525,20 @@ def test_nan_interpolate(self):
     def test_nan_irregular_index(self):
         s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
         result = s.interpolate()
-        expected = Series([1, 2, 3, 4], index=[1, 3, 5, 9])
+        expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9])
         assert_series_equal(result, expected)
 
     def test_nan_str_index(self):
         s = Series([0, 1, 2, np.nan], index=list('abcd'))
         result = s.interpolate()
-        expected = Series([0, 1, 2, 2], index=list('abcd'))
+        expected = Series([0., 1., 2., 2.], index=list('abcd'))
         assert_series_equal(result, expected)
 
     def test_interp_quad(self):
         _skip_if_no_scipy()
         sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
         result = sq.interpolate(method='quadratic')
-        expected = Series([1, 4, 9, 16], index=[1, 2, 3, 4])
+        expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4])
         assert_series_equal(result, expected)
 
     def test_interp_scipy_basic(self):
@@ -545,18 +548,30 @@ def test_interp_scipy_basic(self):
         expected = Series([1., 3., 7.5, 12., 18.5, 25.])
         result = s.interpolate(method='slinear')
         assert_series_equal(result, expected)
+
+        result = s.interpolate(method='slinear', donwcast='infer')
+        assert_series_equal(result, expected)
         # nearest
         expected = Series([1, 3, 3, 12, 12, 25])
         result = s.interpolate(method='nearest')
+        assert_series_equal(result, expected.astype('float'))
+
+        result = s.interpolate(method='nearest', downcast='infer')
         assert_series_equal(result, expected)
         # zero
         expected = Series([1, 3, 3, 12, 12, 25])
         result = s.interpolate(method='zero')
+        assert_series_equal(result, expected.astype('float'))
+
+        result = s.interpolate(method='zero', downcast='infer')
         assert_series_equal(result, expected)
         # quadratic
         expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
         result = s.interpolate(method='quadratic')
         assert_series_equal(result, expected)
+
+        result = s.interpolate(method='quadratic', downcast='infer')
+        assert_series_equal(result, expected)
         # cubic
         expected = Series([1., 3., 6.8, 12., 18.2, 25.])
         result = s.interpolate(method='cubic')
@@ -585,7 +600,6 @@ def test_interp_multiIndex(self):
 
         expected = s.copy()
         expected.loc[2] = 2
-        expected = expected.astype(np.int64)
         result = s.interpolate()
         assert_series_equal(result, expected)
 
@@ -595,7 +609,7 @@ def test_interp_multiIndex(self):
 
     def test_interp_nonmono_raise(self):
         _skip_if_no_scipy()
-        s = pd.Series([1, 2, 3], index=[0, 2, 1])
+        s = Series([1, np.nan, 3], index=[0, 2, 1])
         with tm.assertRaises(ValueError):
             s.interpolate(method='krogh')
 
@@ -603,7 +617,7 @@ def test_interp_datetime64(self):
         _skip_if_no_scipy()
         df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3))
         result = df.interpolate(method='nearest')
-        expected = Series([1, 1, 3], index=date_range('1/1/2000', periods=3))
+        expected = Series([1., 1., 3.], index=date_range('1/1/2000', periods=3))
         assert_series_equal(result, expected)
 
 class TestDataFrame(tm.TestCase, Generic):
@@ -639,7 +653,7 @@ def test_get_numeric_data_preserve_dtype(self):
     def test_interp_basic(self):
         df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 4, 9, np.nan],
                         'C': [1, 2, 3, 5], 'D': list('abcd')})
-        expected = DataFrame({'A': [1, 2, 3, 4], 'B': [1, 4, 9, 9],
+        expected = DataFrame({'A': [1., 2., 3., 4.], 'B': [1., 4., 9., 9.],
                               'C': [1, 2, 3, 5], 'D': list('abcd')})
         result = df.interpolate()
         assert_frame_equal(result, expected)
@@ -648,8 +662,6 @@ def test_interp_basic(self):
         expected = df.set_index('C')
         expected.A.loc[3] = 3
         expected.B.loc[5] = 9
-        expected[['A', 'B']] = expected[['A', 'B']].astype(np.int64)
-
         assert_frame_equal(result, expected)
 
     def test_interp_bad_method(self):
@@ -663,9 +675,14 @@ def test_interp_combo(self):
                         'C': [1, 2, 3, 5], 'D': list('abcd')})
 
         result = df['A'].interpolate()
+        expected = Series([1., 2., 3., 4.])
+        assert_series_equal(result, expected)
+
+        result = df['A'].interpolate(downcast='infer')
         expected = Series([1, 2, 3, 4])
         assert_series_equal(result, expected)
 
+
     def test_interp_nan_idx(self):
         df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]})
         df = df.set_index('A')
@@ -722,13 +739,16 @@ def test_interp_alt_scipy(self):
         expected = df.copy()
         expected['A'].iloc[2] = 3
         expected['A'].iloc[5] = 6
+        assert_frame_equal(result, expected)
+
+        result = df.interpolate(method='barycentric', downcast='infer')
         assert_frame_equal(result, expected.astype(np.int64))
 
         result = df.interpolate(method='krogh')
         expectedk = df.copy()
-        expectedk['A'].iloc[2] = 3
-        expectedk['A'].iloc[5] = 6
-        expectedk['A'] = expected['A'].astype(np.int64)
+        # expectedk['A'].iloc[2] = 3
+        # expectedk['A'].iloc[5] = 6
+        expectedk['A'] = expected['A']
         assert_frame_equal(result, expectedk)
 
         _skip_if_no_pchip()
@@ -786,9 +806,32 @@ def test_interp_raise_on_only_mixed(self):
 
     def test_interp_inplace(self):
         df = DataFrame({'a': [1., 2., np.nan, 4.]})
-        expected = DataFrame({'a': [1, 2, 3, 4]})
-        df['a'].interpolate(inplace=True)
-        assert_frame_equal(df, expected)
+        expected = DataFrame({'a': [1., 2., 3., 4.]})
+        result = df.copy()
+        result['a'].interpolate(inplace=True)
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result['a'].interpolate(inplace=True, downcast='infer')
+        assert_frame_equal(result, expected.astype('int'))
+
+    def test_interp_ignore_all_good(self):
+        # GH
+        df = DataFrame({'A': [1, 2, np.nan, 4],
+                        'B': [1, 2, 3, 4],
+                        'C': [1., 2., np.nan, 4.],
+                        'D': [1., 2., 3., 4.]})
+        expected = DataFrame({'A': np.array([1, 2, 3, 4], dtype='float'),
+                              'B': np.array([1, 2, 3, 4], dtype='int'),
+                              'C': np.array([1., 2., 3, 4.], dtype='float'),
+                              'D': np.array([1., 2., 3., 4.], dtype='float')})
+
+        result = df.interpolate(downcast=None)
+        assert_frame_equal(result, expected)
+
+        # all good
+        result = df[['B', 'D']].interpolate(downcast=None)
+        assert_frame_equal(result, df[['B', 'D']])
 
     def test_no_order(self):
         _skip_if_no_scipy()
@@ -802,7 +845,7 @@ def test_spline(self):
         _skip_if_no_scipy()
         s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
         result = s.interpolate(method='spline', order=1)
-        expected = Series([1, 2, 3, 4, 5, 6, 7])
+        expected = Series([1., 2., 3., 4., 5., 6., 7.])
         assert_series_equal(result, expected)
 
     def test_metadata_propagation_indiv(self):
diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py
index e658ce75247b4..a70d756c82b0a 100644
--- a/vb_suite/frame_methods.py
+++ b/vb_suite/frame_methods.py
@@ -403,3 +403,29 @@ def test_unequal(name):
 frame_object_unequal = Benchmark('test_unequal("object_df")', setup)
 frame_nonunique_unequal = Benchmark('test_unequal("nonunique_cols")', setup)
 
+#-----------------------------------------------------------------------------
+# interpolate
+# this is the worst case, where every column has NaNs.
+setup = common_setup + """
+df = DataFrame(randn(10000, 100))
+df.values[::2] = np.nan
+"""
+
+frame_interpolate = Benchmark('df.interpolate()', setup,
+                               start_date=datetime(2014, 2, 7))
+
+setup = common_setup + """
+df = DataFrame({'A': np.arange(0, 10000),
+                'B': np.random.randint(0, 100, 10000),
+                'C': randn(10000),
+                'D': randn(10000)})
+df.loc[1::5, 'A'] = np.nan
+df.loc[1::5, 'C'] = np.nan
+"""
+
+frame_interpolate_some_good = Benchmark('df.interpolate()', setup,
+                                        start_date=datetime(2014, 2, 7))
+frame_interpolate_some_good_infer = Benchmark('df.interpolate(downcast="infer")',
+                                              setup,
+                                              start_date=datetime(2014, 2, 7))
+

From 8d8d7a3cc6fd3a8e761be77f62e3b621fd63a306 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <thomas-augspurger@uiowa.edu>
Date: Fri, 14 Feb 2014 10:36:23 -0600
Subject: [PATCH 2/2] check in interp_with_fill too

---
 doc/source/release.rst   |  3 ---
 pandas/core/internals.py | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 31d3b88094d37..965ee1dc8e8d9 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -116,16 +116,13 @@ Bug Fixes
 - TimeGrouper has a more compatible API to the rest of the groupers (e.g. ``groups`` was missing) (:issue:`3881`)
 - Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'``
   (:issue:`6351`)
-<<<<<<< HEAD
 - Bug correctly handle placements of ``-inf`` in Panels when dividing by integer 0 (:issue:`6178`)
 - ``DataFrame.shift`` with ``axis=1`` was raising (:issue:`6371`)
 - Disabled clipboard tests until release time (run locally with ``nosetests -A disabled`` (:issue:`6048`).
 - Bug in ``DataFrame.replace()`` when passing a nested ``dict`` that contained
   keys not in the values to be replaced (:issue:`6342`)
 - Bug in take with duplicate columns not consolidated (:issue:`6240`)
-=======
 - Bug in interpolate changing dtypes (:issue:`6290`)
->>>>>>> 336b309... BUG: preserve dtypes in interpolate
 
 pandas 0.13.1
 -------------
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index c89aac0fa7923..e68fc8da6a5db 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -805,6 +805,15 @@ def interpolate(self, method='pad', axis=0, index=None,
                     values=None, inplace=False, limit=None,
                     fill_value=None, coerce=False, downcast=None, **kwargs):
 
+        def check_int_bool(self, inplace):
+            # Only FloatBlocks will contain NaNs.
+            # timedelta subclasses IntBlock
+            if (self.is_bool or self.is_integer) and not self.is_timedelta:
+                if inplace:
+                    return self
+                else:
+                    return self.copy()
+
         # a fill na type method
         try:
             m = com._clean_fill_method(method)
@@ -812,6 +821,9 @@ def interpolate(self, method='pad', axis=0, index=None,
             m = None
 
         if m is not None:
+            r = check_int_bool(self, inplace)
+            if r is not None:
+                return r
             return self._interpolate_with_fill(method=m,
                                                axis=axis,
                                                inplace=inplace,
@@ -826,6 +838,9 @@ def interpolate(self, method='pad', axis=0, index=None,
             m = None
 
         if m is not None:
+            r = check_int_bool(self, inplace)
+            if r is not None:
+                return r
             return self._interpolate(method=m,
                                      index=index,
                                      values=values,