From ce5a5c432679b6d6268a2982aa11c668e27bf01b Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Tue, 21 Sep 2021 14:04:54 -0700
Subject: [PATCH 1/6] wip

---
 pandas/core/arrays/sparse/array.py | 26 +++++++++++++++++---------
 pandas/core/internals/blocks.py    |  3 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 8e92114d7b3de..06f46e3c62bf1 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -892,13 +892,21 @@ def __getitem__(
         elif isinstance(key, tuple):
             data_slice = self.to_dense()[key]
         elif isinstance(key, slice):
-            # special case to preserve dtypes
-            if key == slice(None):
-                return self.copy()
-            # TODO: this logic is surely elsewhere
-            # TODO: this could be more efficient
-            indices = np.arange(len(self), dtype=np.int32)[key]
-            return self.take(indices)
+            start = 0 if key.start is None else key.start
+            if start < 0:
+                start += len(self)
+
+            end = len(self) if key.step is None else key.stop
+            if end < 0:
+                end += len(self)
+
+            indices = self.sp_index.to_int_index().indices
+
+            keep_inds = np.flatnonzero((indices >= start) & (indices <= end))
+            sp_vals = self.sp_values[keep_inds]
+            sp_index = indices[keep_inds]
+            new_sp_index = make_sparse_index(len(self), sp_index, self.kind)
+            return type(self)._simple_new(sp_vals, new_sp_index, self.dtype)
         else:
             # TODO: I think we can avoid densifying when masking a
             # boolean SparseArray with another. Need to look at the
@@ -1762,10 +1770,10 @@ def make_sparse_index(length: int, indices, kind: Literal["integer"]) -> IntInde
 
 def make_sparse_index(length: int, indices, kind: SparseIndexKind) -> SparseIndex:
     index: SparseIndex
-    if kind == "block" or isinstance(kind, BlockIndex):
+    if kind == "block":
         locs, lens = splib.get_blocks(indices)
         index = BlockIndex(length, locs, lens)
-    elif kind == "integer" or isinstance(kind, IntIndex):
+    elif kind == "integer":
         index = IntIndex(length, indices)
     else:  # pragma: no cover
         raise ValueError("must be block or integer type")
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index da7ffbf08c34b..32d32a200efff 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2082,6 +2082,9 @@ def to_native_types(
     """convert to our native types format"""
     values = ensure_wrapped_if_datetimelike(values)
 
+    if is_sparse(values):
+        values = values.to_dense()
+
     if isinstance(values, (DatetimeArray, TimedeltaArray)):
         result = values._format_native_types(na_rep=na_rep, **kwargs)
         result = result.astype(object, copy=False)

From 9a641afe5b769cbbd274a865828ff007d450ca72 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 27 Sep 2021 20:17:15 -0700
Subject: [PATCH 2/6] WIP

---
 doc/source/whatsnew/v1.4.0.rst           |  1 +
 pandas/core/arrays/sparse/array.py       | 44 +++++++++++++++-------
 pandas/tests/arrays/sparse/test_array.py | 48 +++++++++++++++---------
 3 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 7b10a0f39bdbd..6dcca6eaeacf9 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -354,6 +354,7 @@ Performance improvements
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
+- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`?`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 06f46e3c62bf1..9ff279d3ca99c 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -892,21 +892,39 @@ def __getitem__(
         elif isinstance(key, tuple):
             data_slice = self.to_dense()[key]
         elif isinstance(key, slice):
-            start = 0 if key.start is None else key.start
-            if start < 0:
-                start += len(self)
 
-            end = len(self) if key.step is None else key.stop
-            if end < 0:
-                end += len(self)
-
-            indices = self.sp_index.to_int_index().indices
+            # Avoid densifying when handling contiguous slices
+            if key.step is None or key.step == 1:
+                start = 0 if key.start is None else key.start
+                if start < 0:
+                    start += len(self)
+
+                end = len(self) if key.stop is None else key.stop
+                if end < 0:
+                    end += len(self)
+
+                indices = self.sp_index.to_int_index().indices
+                keep_inds = np.flatnonzero((indices >= start) & (indices < end))
+                sp_vals = self.sp_values[keep_inds]
+
+                sp_index = indices[keep_inds].copy()
+
+                # If we've sliced to not include the start of the array, all our indices
+                # should be shifted. NB: here we are careful to also not shift by a
+                # negative value for a case like [0, 1][-100:] where the start index
+                # should be treated like 0
+                if start > 0:
+                    sp_index -= start
+
+                # Length of our result should match applying this slice to a range
+                # of the length of our original array
+                new_len = len(range(len(self))[key])
+                new_sp_index = make_sparse_index(new_len, sp_index, self.kind)
+                return type(self)._simple_new(sp_vals, new_sp_index, self.dtype)
+            else:
+                indices = np.arange(len(self), dtype=np.int32)[key]
+                return self.take(indices)
 
-            keep_inds = np.flatnonzero((indices >= start) & (indices <= end))
-            sp_vals = self.sp_values[keep_inds]
-            sp_index = indices[keep_inds]
-            new_sp_index = make_sparse_index(len(self), sp_index, self.kind)
-            return type(self)._simple_new(sp_vals, new_sp_index, self.dtype)
         else:
             # TODO: I think we can avoid densifying when masking a
             # boolean SparseArray with another. Need to look at the
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index 8c64c5bb3a055..e4d85b138bb69 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -679,23 +679,37 @@ def test_getitem_arraylike_mask(self):
         expected = SparseArray([0, 2])
         tm.assert_sp_array_equal(result, expected)
 
-    def test_getslice(self):
-        result = self.arr[:-3]
-        exp = SparseArray(self.arr.to_dense()[:-3])
-        tm.assert_sp_array_equal(result, exp)
-
-        result = self.arr[-4:]
-        exp = SparseArray(self.arr.to_dense()[-4:])
-        tm.assert_sp_array_equal(result, exp)
-
-        # two corner cases from Series
-        result = self.arr[-12:]
-        exp = SparseArray(self.arr)
-        tm.assert_sp_array_equal(result, exp)
-
-        result = self.arr[:-12]
-        exp = SparseArray(self.arr.to_dense()[:0])
-        tm.assert_sp_array_equal(result, exp)
+    @pytest.mark.parametrize(
+        "slc",
+        [
+            np.s_[:],
+            np.s_[1:10],
+            np.s_[1:100],
+            np.s_[10:1],
+            np.s_[:-3],
+            np.s_[-5:-4],
+            np.s_[:-12],
+            np.s_[-12:],
+            np.s_[2:],
+            np.s_[2::3],
+            np.s_[::2],
+            np.s_[::-1],
+            np.s_[::-2],
+            np.s_[1:6:2],
+            np.s_[:-6:-2],
+        ],
+    )
+    @pytest.mark.parametrize(
+        "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
+    )
+    def test_getslice(self, slc, as_dense):
+        as_dense = np.array(as_dense)
+        arr = SparseArray(as_dense)
+
+        result = arr[slc]
+        expected = SparseArray(as_dense[slc])
+
+        tm.assert_sp_array_equal(result, expected)
 
     def test_getslice_tuple(self):
         dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])

From 79cebfcbfa8ef2d5c8cc3247fd34a6dbf2d1ddad Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 27 Sep 2021 20:39:56 -0700
Subject: [PATCH 3/6] Add benchmarks

---
 asv_bench/benchmarks/sparse.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index ff1c4c92fe551..80587bcad4c6b 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -9,6 +9,8 @@
 )
 from pandas.arrays import SparseArray
 
+from .pandas_vb_common import BaseIO
+
 
 def make_array(size, dense_proportion, fill_value, dtype):
     dense_size = int(size * dense_proportion)
@@ -105,6 +107,22 @@ def time_to_coo(self):
         self.df.sparse.to_coo()
 
 
+class ToCSV(BaseIO):
+    fname = "__test__.csv"
+
+    def setup(self):
+        N = 500_000
+        sp_arr = SparseArray(make_array(N, 1e-5, np.nan, np.float64))
+        self.ser = Series(sp_arr)
+        self.df = pd.DataFrame({"A": self.ser, "B": self.ser.copy()})
+
+    def time_to_csv_series(self):
+        self.ser.to_csv(self.fname)
+
+    def time_to_csv_frame(self):
+        self.df.to_csv(self.fname)
+
+
 class Arithmetic:
 
     params = ([0.1, 0.01], [0, np.nan])
@@ -195,4 +213,17 @@ def time_take(self, indices, allow_fill):
         self.sp_arr.take(indices, allow_fill=allow_fill)
 
 
+class GetItem:
+    def setup(self):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, np.nan, np.float64)
+        self.sp_arr = SparseArray(arr)
+
+    def time_integer_indexing(self):
+        self.sp_arr[78]
+
+    def time_slice(self):
+        self.sp_arr[1:]
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From 2182afa32209c3d1102e18b193a590a10fcc739d Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 27 Sep 2021 20:42:37 -0700
Subject: [PATCH 4/6] Fix accidental commit

---
 pandas/core/internals/blocks.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 3fdc6920bdaa9..002473a1a5fb2 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2078,9 +2078,6 @@ def to_native_types(
     """convert to our native types format"""
     values = ensure_wrapped_if_datetimelike(values)
 
-    if is_sparse(values):
-        values = values.to_dense()
-
     if isinstance(values, (DatetimeArray, TimedeltaArray)):
         result = values._format_native_types(na_rep=na_rep, **kwargs)
         result = result.astype(object, copy=False)

From 5622f636b1e677fd4eb5a0add8a807bfd99312bd Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 27 Sep 2021 20:47:35 -0700
Subject: [PATCH 5/6] Remove slow benchmark to update later

---
 asv_bench/benchmarks/sparse.py | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 80587bcad4c6b..8f424685dfbae 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -9,8 +9,6 @@
 )
 from pandas.arrays import SparseArray
 
-from .pandas_vb_common import BaseIO
-
 
 def make_array(size, dense_proportion, fill_value, dtype):
     dense_size = int(size * dense_proportion)
@@ -107,22 +105,6 @@ def time_to_coo(self):
         self.df.sparse.to_coo()
 
 
-class ToCSV(BaseIO):
-    fname = "__test__.csv"
-
-    def setup(self):
-        N = 500_000
-        sp_arr = SparseArray(make_array(N, 1e-5, np.nan, np.float64))
-        self.ser = Series(sp_arr)
-        self.df = pd.DataFrame({"A": self.ser, "B": self.ser.copy()})
-
-    def time_to_csv_series(self):
-        self.ser.to_csv(self.fname)
-
-    def time_to_csv_frame(self):
-        self.df.to_csv(self.fname)
-
-
 class Arithmetic:
 
     params = ([0.1, 0.01], [0, np.nan])

From f393d0c46fb2c11be0447d2e7a77068dd9f50715 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Mon, 27 Sep 2021 20:52:59 -0700
Subject: [PATCH 6/6] Add pr number

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 9c6bf37888b2d..d65d21878a584 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -357,7 +357,7 @@ Performance improvements
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
 - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
-- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`?`)
+- Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`)
 - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
 - Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`)
 -