python · methane · Jul 27, 2020 · Jul 28, 2020 · Jul 29, 2020
diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py
@@ -737,20 +737,21 @@ def test_sizeof_exact(self):
         size = support.calcobjsize
         check = self.check_sizeof
 
-        basicsize = size('nQ2P' + '3PnPn2P') + calcsize('2nP2n')
+        basicsize = size('nQ2P' + '3PnPn2P')
+        keysize = calcsize('2nP2n')
 
         entrysize = calcsize('n2P')
         p = calcsize('P')
         nodesize = calcsize('Pn2P')
 
         od = OrderedDict()
-        check(od, basicsize + 8 + 5*entrysize)  # 8byte indices + 8*2//3 * entry table
+        check(od, basicsize)  # 8byte indices + 8*2//3 * entry table
         od.x = 1
-        check(od, basicsize + 8 + 5*entrysize)
+        check(od, basicsize)
         od.update([(i, i) for i in range(3)])
-        check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
+        check(od, basicsize + keysize + 8*p + 8 + 5*entrysize + 3*nodesize)
         od.update([(i, i) for i in range(3, 10)])
-        check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
+        check(od, basicsize + keysize + 16*p + 16 + 10*entrysize + 10*nodesize)
 
         check(od.keys(), size('P'))
         check(od.items(), size('P'))

diff --git a/Objects/dictobject.c b/Objects/dictobject.c
@@ -674,60 +674,6 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
 }
 
 
-static PyObject *
-clone_combined_dict(PyDictObject *orig)
-{
-    assert(PyDict_CheckExact(orig));
-    assert(orig->ma_values == NULL);
-    assert(orig->ma_keys->dk_refcnt == 1);
-
-    Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
-    PyDictKeysObject *keys = PyObject_Malloc(keys_size);
-    if (keys == NULL) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-
-    memcpy(keys, orig->ma_keys, keys_size);
-
-    /* After copying key/value pairs, we need to incref all
-       keys and values and they are about to be co-owned by a
-       new dict object. */
-    PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
-    Py_ssize_t n = keys->dk_nentries;
-    for (Py_ssize_t i = 0; i < n; i++) {
-        PyDictKeyEntry *entry = &ep0[i];
-        PyObject *value = entry->me_value;
-        if (value != NULL) {
-            Py_INCREF(value);
-            Py_INCREF(entry->me_key);
-        }
-    }
-
-    PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
-    if (new == NULL) {
-        /* In case of an error, `new_dict()` takes care of
-           cleaning up `keys`. */
-        return NULL;
-    }
-    new->ma_used = orig->ma_used;
-    ASSERT_CONSISTENT(new);
-    if (_PyObject_GC_IS_TRACKED(orig)) {
-        /* Maintain tracking. */
-        _PyObject_GC_TRACK(new);
-    }
-
-    /* Since we copied the keys table we now have an extra reference
-       in the system.  Manually call increment _Py_RefTotal to signal that
-       we have it now; calling dictkeys_incref would be an error as
-       keys->dk_refcnt is already set to 1 (after memcpy). */
-#ifdef Py_REF_DEBUG
-    _Py_RefTotal++;
-#endif
-
-    return (PyObject *)new;
-}
-
 PyObject *
 PyDict_New(void)
 {
@@ -2503,6 +2449,96 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
     return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
 }
 
+// Copy *src* dict into empty *dst* dict.
+// See asserts for preconditions.
+static int
+dict_copy2(PyDictObject *dst, PyDictObject *src)
+{
+    assert(dst->ma_used == 0);
+    assert(Py_TYPE(src)->tp_iter == (getiterfunc)dict_iter);
+
+    Py_ssize_t estimate = ESTIMATE_SIZE(src->ma_used);
+    Py_ssize_t keys_size;
+    for (keys_size = PyDict_MINSIZE;
+         keys_size < estimate && keys_size > 0;
+         keys_size <<= 1)
+        ;
+    if (keys_size <= 0) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    PyDictKeysObject *keys = new_keys_object(keys_size);
+    if (keys == NULL) {
+        return -1;
+    }
+
+    if (src->ma_keys->dk_lookup == lookdict_unicode
+            || src->ma_keys->dk_lookup == lookdict_split) {
+        keys->dk_lookup = lookdict_unicode_nodummy;
+    }
+    else {
+        keys->dk_lookup = src->ma_keys->dk_lookup;
+    }
+
+    // Copying entries.
+    PyDictKeyEntry *ep = DK_ENTRIES(src->ma_keys);
+    PyDictKeyEntry *newentries = DK_ENTRIES(keys);
+
+    if (src->ma_values) {
+        // split table. It must be dense.
+        PyObject **values = src->ma_values;
+        for (Py_ssize_t i = 0; i < src->ma_used; i++) {
+            assert(values[i] != NULL);
+            Py_INCREF(values[i]);
+            Py_INCREF(ep[i].me_key);
+            newentries[i].me_key = ep[i].me_key;
+            newentries[i].me_hash = ep[i].me_hash;
+            newentries[i].me_value = values[i];
+        }
+    }
+    else { // combined table. it might be sparce.
+        for (Py_ssize_t i = 0; i < src->ma_used; i++) {
+            while (ep->me_value == NULL)
+                ep++;
+            Py_INCREF(ep->me_key);
+            Py_INCREF(ep->me_value);
+            newentries[i] = *ep++;
+        }
+    }
+
+    // When src is clean and size is same, just use memcpy.
+    if (keys_size == src->ma_keys->dk_size &&
+            src->ma_used == src->ma_keys->dk_nentries) {
+        size_t index_size = DK_SIZE(keys) * DK_IXSIZE(keys);
+        memcpy(keys->dk_indices, src->ma_keys->dk_indices, index_size);
+    }
+    else {
+        build_indices(keys, newentries, src->ma_used);
+    }
+
+    keys->dk_usable -= src->ma_used;
+    keys->dk_nentries = src->ma_used;
+
+    dictkeys_decref(dst->ma_keys);
+    dst->ma_keys = keys;
+
+    if (dst->ma_values && dst->ma_values != empty_values) {
+        free_values(dst->ma_values);
+    }
+    dst->ma_values = NULL;
+    dst->ma_used = src->ma_used;
+    dst->ma_version_tag = DICT_NEXT_VERSION();
+
+    ASSERT_CONSISTENT(dst);
+
+    /* Maintain tracking. */
+    if (_PyObject_GC_IS_TRACKED(src) && !_PyObject_GC_IS_TRACKED(dst)) {
+        _PyObject_GC_TRACK(dst);
+    }
+    return 0;
+}
+
+
 static int
 dict_merge(PyObject *a, PyObject *b, int override)
 {
@@ -2527,12 +2563,13 @@ dict_merge(PyObject *a, PyObject *b, int override)
         if (other == mp || other->ma_used == 0)
             /* a.update(a) or a.update({}); nothing to do */
             return 0;
-        if (mp->ma_used == 0)
+        if (mp->ma_used == 0) {
             /* Since the target dict is empty, PyDict_GetItem()
-             * always returns NULL.  Setting override to 1
-             * skips the unnecessary test.
+             * always returns NULL. And there is no duplicated key
+             * in b.
              */
-            override = 1;
+            return dict_copy2(mp, other);
+        }
         /* Do one big resize at the start, rather than
          * incrementally resizing as we insert new items.  Expect
          * that there will be no (or few) overlapping keys.
@@ -2718,30 +2755,10 @@ PyDict_Copy(PyObject *o)
         return (PyObject *)split_copy;
     }
 
-    if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
-            (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
-    {
-        /* Use fast-copy if:
-
-           (1) 'mp' is an instance of a subclassed dict; and
-
-           (2) 'mp' is not a split-dict; and
-
-           (3) if 'mp' is non-compact ('del' operation does not resize dicts),
-               do fast-copy only if it has at most 1/3 non-used keys.
-
-           The last condition (3) is important to guard against a pathological
-           case when a large dict is almost emptied with multiple del/pop
-           operations and copied after that.  In cases like this, we defer to
-           PyDict_Merge, which produces a compacted copy.
-        */
-        return clone_combined_dict(mp);
-    }
-
     copy = PyDict_New();
     if (copy == NULL)
         return NULL;
-    if (PyDict_Merge(copy, o, 1) == 0)
+    if (dict_merge(copy, o, 1) == 0)
         return copy;
     Py_DECREF(copy);
     return NULL;
@@ -3359,16 +3376,15 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     d = (PyDictObject *)self;
 
     /* The object has been implicitly tracked by tp_alloc */
-    if (type == &PyDict_Type)
+    if (type == &PyDict_Type) {
         _PyObject_GC_UNTRACK(d);
+    }
 
     d->ma_used = 0;
     d->ma_version_tag = DICT_NEXT_VERSION();
-    d->ma_keys = new_keys_object(PyDict_MINSIZE);
-    if (d->ma_keys == NULL) {
-        Py_DECREF(self);
-        return NULL;
-    }
+    dictkeys_incref(Py_EMPTY_KEYS);
+    d->ma_keys = Py_EMPTY_KEYS;
+    d->ma_values = empty_values;
     ASSERT_CONSISTENT(d);
     return self;
 }