diff --git a/Include/internal/pycore_range.h b/Include/internal/pycore_range.h
index 809e89a1e01b60..dcaeb0f8c618ce 100644
--- a/Include/internal/pycore_range.h
+++ b/Include/internal/pycore_range.h
@@ -10,10 +10,9 @@ extern "C" {
 
 typedef struct {
     PyObject_HEAD
-    long index;
     long start;
+    long stop;
     long step;
-    long len;
 } _PyRangeIterObject;
 
 #ifdef __cplusplus
diff --git a/Lib/test/test_range.py b/Lib/test/test_range.py
index 851ad5b7c2f485..7be76b32ac2935 100644
--- a/Lib/test/test_range.py
+++ b/Lib/test/test_range.py
@@ -407,11 +407,7 @@ def test_iterator_pickling_overflowing_index(self):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             with self.subTest(proto=proto):
                 it = iter(range(2**32 + 2))
-                _, _, idx = it.__reduce__()
-                self.assertEqual(idx, 0)
-                it.__setstate__(2**32 + 1)  # undocumented way to set r->index
-                _, _, idx = it.__reduce__()
-                self.assertEqual(idx, 2**32 + 1)
+                it.__setstate__(2**32 + 1)  # undocumented way to advance an iterator
                 d = pickle.dumps(it, proto)
                 it = pickle.loads(d)
                 self.assertEqual(next(it), 2**32 + 1)
@@ -442,6 +438,38 @@ def test_large_exhausted_iterator_pickling(self):
             self.assertEqual(list(i), [])
             self.assertEqual(list(i2), [])
 
+    def test_iterator_unpickle_compat(self):
+        testcases = [
+            b'c__builtin__\niter\n(c__builtin__\nxrange\n(I10\nI20\nI2\ntRtRI2\nb.',
+            b'c__builtin__\niter\n(c__builtin__\nxrange\n(K\nK\x14K\x02tRtRK\x02b.',
+            b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\nK\nK\x14K\x02\x87R\x85RK\x02b.',
+            b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\nK\nK\x14K\x02\x87R\x85RK\x02b.',
+            b'\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93K\nK\x14K\x02\x87R\x85RK\x02b.',
+
+            b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nI20\nI2\ntRtRL18446744073709551623L\nb.',
+            b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nK\x14K\x02tRtRL18446744073709551623L\nb.',
+            b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
+            b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
+            b'\x80\x04\x95C\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
+        ]
+        for t in testcases:
+            it = pickle.loads(t)
+            self.assertEqual(list(it), [14, 16, 18])
+
+    def test_iterator_setstate(self):
+        it = iter(range(10, 20, 2))
+        it.__setstate__(2)
+        self.assertEqual(list(it), [14, 16, 18])
+        it = reversed(range(10, 20, 2))
+        it.__setstate__(3)
+        self.assertEqual(list(it), [12, 10])
+        it = iter(range(-2**65, 20, 2))
+        it.__setstate__(2**64 + 7)
+        self.assertEqual(list(it), [14, 16, 18])
+        it = reversed(range(10, 2**65, 2))
+        it.__setstate__(2**64 - 7)
+        self.assertEqual(list(it), [12, 10])
+
     def test_odd_bug(self):
         # This used to raise a "SystemError: NULL result without error"
         # because the range validation step was eating the exception
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 2403c7c815f2c0..17a5026e2571e1 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1484,7 +1484,8 @@ def delx(self): del self.__x
         # PyCapsule
         # XXX
         # rangeiterator
-        check(iter(range(1)), size('4l'))
+        check(iter(range(1)), size('3l'))
+        check(iter(range(2**65)), size('3P'))
         # reverse
         check(reversed(''), size('nP'))
         # range
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst
new file mode 100644
index 00000000000000..481ab53e4f5197
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-29-15-55-19.bpo-45026.z7nTA3.rst	
@@ -0,0 +1,3 @@
+Optimize the :class:`range` object iterator. It is now smaller, faster
+iteration of ranges containing large numbers. Smaller pickles, faster
+unpickling.
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
index a889aa04db81f0..fe2998a61c0a7e 100644
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -753,21 +753,50 @@ PyTypeObject PyRange_Type = {
    in the normal case, but possible for any numeric value.
 */
 
+/* Return number of items in range (lo, hi, step).  step != 0
+ * required.  The result always fits in an unsigned long.
+ */
+static unsigned long
+get_len_of_range(long lo, long hi, long step)
+{
+    /* -------------------------------------------------------------
+    If step > 0 and lo >= hi, or step < 0 and lo <= hi, the range is empty.
+    Else for step > 0, if n values are in the range, the last one is
+    lo + (n-1)*step, which must be <= hi-1.  Rearranging,
+    n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
+    the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
+    the RHS is non-negative and so truncation is the same as the
+    floor.  Letting M be the largest positive long, the worst case
+    for the RHS numerator is hi=M, lo=-M-1, and then
+    hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
+    precision to compute the RHS exactly.  The analysis for step < 0
+    is similar.
+    ---------------------------------------------------------------*/
+    assert(step != 0);
+    if (step > 0 && lo < hi)
+        return 1UL + (hi - 1UL - lo) / step;
+    else if (step < 0 && lo > hi)
+        return 1UL + (lo - 1UL - hi) / (0UL - step);
+    else
+        return 0UL;
+}
+
 static PyObject *
 rangeiter_next(_PyRangeIterObject *r)
 {
-    if (r->index < r->len)
-        /* cast to unsigned to avoid possible signed overflow
-           in intermediate calculations. */
-        return PyLong_FromLong((long)(r->start +
-                                      (unsigned long)(r->index++) * r->step));
+    long result = r->start, step = r->step;
+    if (step > 0 ? result < r->stop : result > r->stop) {
+        r->start = result + step;
+        return PyLong_FromLong(result);
+    }
     return NULL;
 }
 
 static PyObject *
 rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
 {
-    return PyLong_FromLong(r->len - r->index);
+    unsigned long ulen = get_len_of_range(r->start, r->stop, r->step);
+    return PyLong_FromUnsignedLong(ulen);
 }
 
 PyDoc_STRVAR(length_hint_doc,
@@ -783,7 +812,7 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
     start = PyLong_FromLong(r->start);
     if (start == NULL)
         goto err;
-    stop = PyLong_FromLong(r->start + r->len * r->step);
+    stop = PyLong_FromLong(r->stop);
     if (stop == NULL)
         goto err;
     step = PyLong_FromLong(r->step);
@@ -794,8 +823,8 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
     if (range == NULL)
         goto err;
     /* return the result */
-    return Py_BuildValue(
-            "N(N)l", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index);
+    return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
+                         range, Py_None);
 err:
     Py_XDECREF(start);
     Py_XDECREF(stop);
@@ -812,9 +841,12 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state)
     /* silently clip the index value */
     if (index < 0)
         index = 0;
-    else if (index > r->len)
-        index = r->len; /* exhausted iterator */
-    r->index = index;
+    else {
+        unsigned long ulen = get_len_of_range(r->start, r->stop, r->step);
+        if ((unsigned long)index > ulen)
+            index = (long)ulen; /* exhausted iterator */
+    }
+    r->start += index * r->step;
     Py_RETURN_NONE;
 }
 
@@ -864,34 +896,6 @@ PyTypeObject PyRangeIter_Type = {
         0,                                      /* tp_members */
 };
 
-/* Return number of items in range (lo, hi, step).  step != 0
- * required.  The result always fits in an unsigned long.
- */
-static unsigned long
-get_len_of_range(long lo, long hi, long step)
-{
-    /* -------------------------------------------------------------
-    If step > 0 and lo >= hi, or step < 0 and lo <= hi, the range is empty.
-    Else for step > 0, if n values are in the range, the last one is
-    lo + (n-1)*step, which must be <= hi-1.  Rearranging,
-    n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
-    the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
-    the RHS is non-negative and so truncation is the same as the
-    floor.  Letting M be the largest positive long, the worst case
-    for the RHS numerator is hi=M, lo=-M-1, and then
-    hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
-    precision to compute the RHS exactly.  The analysis for step < 0
-    is similar.
-    ---------------------------------------------------------------*/
-    assert(step != 0);
-    if (step > 0 && lo < hi)
-        return 1UL + (hi - 1UL - lo) / step;
-    else if (step < 0 && lo > hi)
-        return 1UL + (lo - 1UL - hi) / (0UL - step);
-    else
-        return 0UL;
-}
-
 /* Initialize a rangeiter object.  If the length of the rangeiter object
    is not representable as a C long, OverflowError is raised. */
 
@@ -902,52 +906,42 @@ fast_range_iter(long start, long stop, long step, long len)
     if (it == NULL)
         return NULL;
     it->start = start;
+    it->stop = stop;
     it->step = step;
-    it->len = len;
-    it->index = 0;
     return (PyObject *)it;
 }
 
 typedef struct {
     PyObject_HEAD
-    PyObject *index;
     PyObject *start;
+    PyObject *stop;
     PyObject *step;
-    PyObject *len;
 } longrangeiterobject;
 
 static PyObject *
 longrangeiter_len(longrangeiterobject *r, PyObject *no_args)
 {
-    return PyNumber_Subtract(r->len, r->index);
+    return compute_range_length(r->start, r->stop, r->step);
 }
 
 static PyObject *
 longrangeiter_reduce(longrangeiterobject *r, PyObject *Py_UNUSED(ignored))
 {
-    PyObject *product, *stop=NULL;
     PyObject *range;
 
-    /* create a range object for pickling.  Must calculate the "stop" value */
-    product = PyNumber_Multiply(r->len, r->step);
-    if (product == NULL)
-        return NULL;
-    stop = PyNumber_Add(r->start, product);
-    Py_DECREF(product);
-    if (stop ==  NULL)
-        return NULL;
+    /* create a range object for pickling. */
     range =  (PyObject*)make_range_object(&PyRange_Type,
-                               Py_NewRef(r->start), stop, Py_NewRef(r->step));
+                Py_NewRef(r->start), Py_NewRef(r->stop), Py_NewRef(r->step));
     if (range == NULL) {
         Py_DECREF(r->start);
-        Py_DECREF(stop);
+        Py_DECREF(r->stop);
         Py_DECREF(r->step);
         return NULL;
     }
 
     /* return the result */
-    return Py_BuildValue(
-            "N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index);
+    return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
+                         range, Py_None);
 }
 
 static PyObject *
@@ -957,20 +951,34 @@ longrangeiter_setstate(longrangeiterobject *r, PyObject *state)
     int cmp;
 
     /* clip the value */
-    cmp = PyObject_RichCompareBool(state, zero, Py_LT);
+    cmp = PyObject_RichCompareBool(state, zero, Py_LE);
     if (cmp < 0)
         return NULL;
     if (cmp > 0) {
-        state = zero;
+        Py_RETURN_NONE;
     }
-    else {
-        cmp = PyObject_RichCompareBool(r->len, state, Py_LT);
-        if (cmp < 0)
-            return NULL;
-        if (cmp > 0)
-            state = r->len;
+    PyObject *length = compute_range_length(r->start, r->stop, r->step);
+    if (length == NULL) {
+        return NULL;
+    }
+    cmp = PyObject_RichCompareBool(length, state, Py_LE);
+    if (cmp < 0) {
+        Py_DECREF(length);
+        return NULL;
     }
-    Py_XSETREF(r->index, Py_NewRef(state));
+    if (cmp > 0) {
+        state = length;
+    }
+    PyObject *product = PyNumber_Multiply(state, r->step);
+    Py_DECREF(length);
+    if (product == NULL) {
+        return NULL;
+    }
+    PyObject *new_start = PyNumber_Add(r->start, product);
+    Py_DECREF(product);
+    if (new_start == NULL)
+        return NULL;
+    Py_SETREF(r->start, new_start);
     Py_RETURN_NONE;
 }
 
@@ -987,39 +995,25 @@ static PyMethodDef longrangeiter_methods[] = {
 static void
 longrangeiter_dealloc(longrangeiterobject *r)
 {
-    Py_XDECREF(r->index);
     Py_XDECREF(r->start);
+    Py_XDECREF(r->stop);
     Py_XDECREF(r->step);
-    Py_XDECREF(r->len);
     PyObject_Free(r);
 }
 
 static PyObject *
 longrangeiter_next(longrangeiterobject *r)
 {
-    PyObject *product, *new_index, *result;
-    if (PyObject_RichCompareBool(r->index, r->len, Py_LT) != 1)
+    int s = _PyLong_Sign(r->step);
+    if (PyObject_RichCompareBool(r->start, r->stop, s > 0 ? Py_LT : Py_GT) != 1)
         return NULL;
 
-    new_index = PyNumber_Add(r->index, _PyLong_GetOne());
-    if (!new_index)
+    PyObject *new_start = PyNumber_Add(r->start, r->step);
+    if (new_start == NULL) {
         return NULL;
-
-    product = PyNumber_Multiply(r->index, r->step);
-    if (!product) {
-        Py_DECREF(new_index);
-        return NULL;
-    }
-
-    result = PyNumber_Add(r->start, product);
-    Py_DECREF(product);
-    if (result) {
-        Py_SETREF(r->index, new_index);
     }
-    else {
-        Py_DECREF(new_index);
-    }
-
+    PyObject *result = r->start;
+    r->start = new_start;
     return result;
 }
 
@@ -1106,9 +1100,8 @@ range_iter(PyObject *seq)
         return NULL;
 
     it->start = Py_NewRef(r->start);
+    it->stop = Py_NewRef(r->stop);
     it->step = Py_NewRef(r->step);
-    it->len = Py_NewRef(r->length);
-    it->index = Py_NewRef(_PyLong_GetZero());
     return (PyObject *)it;
 }
 
@@ -1117,7 +1110,7 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored))
 {
     rangeobject *range = (rangeobject*) seq;
     longrangeiterobject *it;
-    PyObject *sum, *diff, *product;
+    PyObject *product;
     long lstart, lstop, lstep, new_start, new_stop;
     unsigned long ulen;
 
@@ -1186,23 +1179,20 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored))
     it = PyObject_New(longrangeiterobject, &PyLongRangeIter_Type);
     if (it == NULL)
         return NULL;
-    it->index = it->start = it->step = NULL;
+    it->start = it->step = NULL;
 
-    /* start + (len - 1) * step */
-    it->len = Py_NewRef(range->length);
-
-    diff = PyNumber_Subtract(it->len, _PyLong_GetOne());
-    if (!diff)
+    /* new_stop = start - step */
+    it->stop = PyNumber_Subtract(range->start, range->step);
+    if (!it->stop)
         goto create_failure;
 
-    product = PyNumber_Multiply(diff, range->step);
-    Py_DECREF(diff);
+    /* new_start = new_stop + len * step */
+    product = PyNumber_Multiply(range->length, range->step);
     if (!product)
         goto create_failure;
 
-    sum = PyNumber_Add(range->start, product);
+    it->start = PyNumber_Add(it->stop, product);
     Py_DECREF(product);
-    it->start = sum;
     if (!it->start)
         goto create_failure;
 
@@ -1210,7 +1200,6 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored))
     if (!it->step)
         goto create_failure;
 
-    it->index = Py_NewRef(_PyLong_GetZero());
     return (PyObject *)it;
 
 create_failure:
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index a1f910da8ed54a..75cdfdbf7ba0ce 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -2620,14 +2620,14 @@ dummy_func(
             STAT_INC(FOR_ITER, hit);
             _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER];
             assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST);
-            if (r->index >= r->len) {
+            long value = r->start, step = r->step;
+            if (step > 0 ? value >= r->stop : value <= r->stop) {
                 STACK_SHRINK(1);
                 Py_DECREF(r);
                 JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
             }
             else {
-                long value = (long)(r->start +
-                                    (unsigned long)(r->index++) * r->step);
+                r->start = value + step;
                 if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) {
                     goto error;
                 }
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ae8fdd5e99c3dc..d92b42e5253a3a 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -2638,14 +2638,14 @@
             STAT_INC(FOR_ITER, hit);
             _Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER];
             assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST);
-            if (r->index >= r->len) {
+            long value = r->start, step = r->step;
+            if (step > 0 ? value >= r->stop : value <= r->stop) {
                 STACK_SHRINK(1);
                 Py_DECREF(r);
                 JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
             }
             else {
-                long value = (long)(r->start +
-                                    (unsigned long)(r->index++) * r->step);
+                r->start = value + step;
                 if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) {
                     goto error;
                 }