From 97f2089e7b730ad49af3e495264eb2e2f99002db Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 27 Jul 2020 18:09:39 +0900 Subject: [PATCH 1/3] Use empty key-sharing dict in dict_new --- Lib/test/test_ordered_dict.py | 11 ++++++----- Objects/dictobject.c | 11 +++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index fdea44e4d85965..8af99e98e0adee 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -737,20 +737,21 @@ def test_sizeof_exact(self): size = support.calcobjsize check = self.check_sizeof - basicsize = size('nQ2P' + '3PnPn2P') + calcsize('2nP2n') + basicsize = size('nQ2P' + '3PnPn2P') + keysize = calcsize('2nP2n') entrysize = calcsize('n2P') p = calcsize('P') nodesize = calcsize('Pn2P') od = OrderedDict() - check(od, basicsize + 8 + 5*entrysize) # 8byte indices + 8*2//3 * entry table + check(od, basicsize) # 8byte indices + 8*2//3 * entry table od.x = 1 - check(od, basicsize + 8 + 5*entrysize) + check(od, basicsize) od.update([(i, i) for i in range(3)]) - check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize) + check(od, basicsize + keysize + 8*p + 8 + 5*entrysize + 3*nodesize) od.update([(i, i) for i in range(3, 10)]) - check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize) + check(od, basicsize + keysize + 16*p + 16 + 10*entrysize + 10*nodesize) check(od.keys(), size('P')) check(od.items(), size('P')) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ba22489539ae7d..3370d7c5fbe6af 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3359,16 +3359,15 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) d = (PyDictObject *)self; /* The object has been implicitly tracked by tp_alloc */ - if (type == &PyDict_Type) + if (type == &PyDict_Type) { _PyObject_GC_UNTRACK(d); + } d->ma_used = 0; d->ma_version_tag = DICT_NEXT_VERSION(); - d->ma_keys = new_keys_object(PyDict_MINSIZE); - if (d->ma_keys == NULL) { - Py_DECREF(self); - return NULL; - } + dictkeys_incref(Py_EMPTY_KEYS); + d->ma_keys = Py_EMPTY_KEYS; + d->ma_values = empty_values; ASSERT_CONSISTENT(d); return self; } From cf4f61ce50e07f7ccd3aef991647050c8da058f9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 28 Jul 2020 13:53:25 +0900 Subject: [PATCH 2/3] Add dict_copy2 --- Objects/dictobject.c | 89 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3370d7c5fbe6af..ceec17a6056128 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2503,6 +2503,86 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) return Py_SAFE_DOWNCAST(i, Py_ssize_t, int); } +// Copy *src* dict into empty *dst* dict. +// See asserts for preconditions. +static int +dict_copy2(PyDictObject *dst, PyDictObject *src) +{ + assert(dst->ma_used == 0); + assert(Py_TYPE(src)->tp_iter == (getiterfunc)dict_iter); + + Py_ssize_t estimate = ESTIMATE_SIZE(src->ma_used); + Py_ssize_t keys_size; + for (keys_size = PyDict_MINSIZE; + keys_size < estimate && keys_size > 0; + keys_size <<= 1) + ; + if (keys_size <= 0) { + PyErr_NoMemory(); + return -1; + } + PyDictKeysObject *keys = new_keys_object(keys_size); + if (keys == NULL) { + return -1; + } + + if (src->ma_keys->dk_lookup == lookdict_unicode + || src->ma_keys->dk_lookup == lookdict_split) { + keys->dk_lookup = lookdict_unicode_nodummy; + } + else { + keys->dk_lookup = src->ma_keys->dk_lookup; + } + + // Copying entries. + PyDictKeyEntry *ep = DK_ENTRIES(src->ma_keys); + PyDictKeyEntry *newentries = DK_ENTRIES(keys); + + if (src->ma_values) { + // split table. It must be dense. + PyObject **values = src->ma_values; + for (Py_ssize_t i = 0; i < src->ma_used; i++) { + assert(values[i] != NULL); + Py_INCREF(values[i]); + Py_INCREF(ep[i].me_key); + newentries[i].me_key = ep[i].me_key; + newentries[i].me_hash = ep[i].me_hash; + newentries[i].me_value = values[i]; + } + } + else { // combined table. it might be sparce. + for (Py_ssize_t i = 0; i < src->ma_used; i++) { + while (ep->me_value == NULL) + ep++; + Py_INCREF(ep->me_key); + Py_INCREF(ep->me_value); + newentries[i] = *ep++; + } + } + + build_indices(keys, newentries, src->ma_used); + keys->dk_usable -= src->ma_used; + keys->dk_nentries = src->ma_used; + + dictkeys_decref(dst->ma_keys); + dst->ma_keys = keys; + + if (dst->ma_values && dst->ma_values != empty_values) { + free_values(dst->ma_values); + } + dst->ma_values = NULL; + dst->ma_used = src->ma_used; + dst->ma_version_tag = DICT_NEXT_VERSION(); + + ASSERT_CONSISTENT(dst); + + /* Maintain tracking. */ + if (_PyObject_GC_IS_TRACKED(src) && !_PyObject_GC_IS_TRACKED(dst)) { + _PyObject_GC_TRACK(dst); + } + return 0; +} + static int dict_merge(PyObject *a, PyObject *b, int override) { @@ -2527,12 +2607,13 @@ dict_merge(PyObject *a, PyObject *b, int override) if (other == mp || other->ma_used == 0) /* a.update(a) or a.update({}); nothing to do */ return 0; - if (mp->ma_used == 0) + if (mp->ma_used == 0) { /* Since the target dict is empty, PyDict_GetItem() - * always returns NULL. Setting override to 1 - * skips the unnecessary test. + * always returns NULL. And there is no duplicated key + * in b. */ - override = 1; + return dict_copy2(mp, other); + } /* Do one big resize at the start, rather than * incrementally resizing as we insert new items. Expect * that there will be no (or few) overlapping keys. From 4ac880baed561b8393d44ffe9629cb5129c8c60d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 29 Jul 2020 11:28:05 +0900 Subject: [PATCH 3/3] Remove clone_combined_dict --- Objects/dictobject.c | 88 ++++++-------------------------------------- 1 file changed, 12 insertions(+), 76 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ceec17a6056128..e23156994ad161 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -674,60 +674,6 @@ new_dict_with_shared_keys(PyDictKeysObject *keys) } -static PyObject * -clone_combined_dict(PyDictObject *orig) -{ - assert(PyDict_CheckExact(orig)); - assert(orig->ma_values == NULL); - assert(orig->ma_keys->dk_refcnt == 1); - - Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys); - PyDictKeysObject *keys = PyObject_Malloc(keys_size); - if (keys == NULL) { - PyErr_NoMemory(); - return NULL; - } - - memcpy(keys, orig->ma_keys, keys_size); - - /* After copying key/value pairs, we need to incref all - keys and values and they are about to be co-owned by a - new dict object. */ - PyDictKeyEntry *ep0 = DK_ENTRIES(keys); - Py_ssize_t n = keys->dk_nentries; - for (Py_ssize_t i = 0; i < n; i++) { - PyDictKeyEntry *entry = &ep0[i]; - PyObject *value = entry->me_value; - if (value != NULL) { - Py_INCREF(value); - Py_INCREF(entry->me_key); - } - } - - PyDictObject *new = (PyDictObject *)new_dict(keys, NULL); - if (new == NULL) { - /* In case of an error, `new_dict()` takes care of - cleaning up `keys`. */ - return NULL; - } - new->ma_used = orig->ma_used; - ASSERT_CONSISTENT(new); - if (_PyObject_GC_IS_TRACKED(orig)) { - /* Maintain tracking. */ - _PyObject_GC_TRACK(new); - } - - /* Since we copied the keys table we now have an extra reference - in the system. Manually call increment _Py_RefTotal to signal that - we have it now; calling dictkeys_incref would be an error as - keys->dk_refcnt is already set to 1 (after memcpy). */ -#ifdef Py_REF_DEBUG - _Py_RefTotal++; -#endif - - return (PyObject *)new; -} - PyObject * PyDict_New(void) { @@ -2560,7 +2506,16 @@ dict_copy2(PyDictObject *dst, PyDictObject *src) } } - build_indices(keys, newentries, src->ma_used); + // When src is clean and size is same, just use memcpy. + if (keys_size == src->ma_keys->dk_size && + src->ma_used == src->ma_keys->dk_nentries) { + size_t index_size = DK_SIZE(keys) * DK_IXSIZE(keys); + memcpy(keys->dk_indices, src->ma_keys->dk_indices, index_size); + } + else { + build_indices(keys, newentries, src->ma_used); + } + keys->dk_usable -= src->ma_used; keys->dk_nentries = src->ma_used; @@ -2583,6 +2538,7 @@ dict_copy2(PyDictObject *dst, PyDictObject *src) return 0; } + static int dict_merge(PyObject *a, PyObject *b, int override) { @@ -2799,30 +2755,10 @@ PyDict_Copy(PyObject *o) return (PyObject *)split_copy; } - if (PyDict_CheckExact(mp) && mp->ma_values == NULL && - (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3)) - { - /* Use fast-copy if: - - (1) 'mp' is an instance of a subclassed dict; and - - (2) 'mp' is not a split-dict; and - - (3) if 'mp' is non-compact ('del' operation does not resize dicts), - do fast-copy only if it has at most 1/3 non-used keys. - - The last condition (3) is important to guard against a pathological - case when a large dict is almost emptied with multiple del/pop - operations and copied after that. In cases like this, we defer to - PyDict_Merge, which produces a compacted copy. - */ - return clone_combined_dict(mp); - } - copy = PyDict_New(); if (copy == NULL) return NULL; - if (PyDict_Merge(copy, o, 1) == 0) + if (dict_merge(copy, o, 1) == 0) return copy; Py_DECREF(copy); return NULL;