Skip to content

bpo-41493: Refactoring dictresize #21751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 7, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 41 additions & 26 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ converting the dict to the combined table.
#define PyDict_MINSIZE 8

#include "Python.h"
#include "pycore_bitutils.h" // _Py_bit_length
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pyerrors.h" // _PyErr_Fetch()
Expand Down Expand Up @@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);

static int dictresize(PyDictObject *mp, Py_ssize_t minused);
static int dictresize(PyDictObject *mp, Py_ssize_t newsize);

static PyObject* dict_iter(PyDictObject *dict);

Expand Down Expand Up @@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
*/
#define USABLE_FRACTION(n) (((n) << 1)/3)

/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
/* Find the smallest dk_size >= minsize. */
static inline Py_ssize_t
calculate_keysize(Py_ssize_t minsize)
{
#if SIZEOF_LONG == SIZEOF_SIZE_T
minsize = (minsize | PyDict_MINSIZE) - 1;
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
#elif defined(_MSC_VER)
// On 64bit Windows, sizeof(long) == 4.
minsize = (minsize | PyDict_MINSIZE) - 1;
unsigned long msb;
_BitScanReverse64(&msb, (uint64_t)minsize);
return 1LL << (msb + 1);
#else
Py_ssize_t size;
for (size = PyDict_MINSIZE;
size < minsize && size > 0;
size <<= 1)
;
return size;
#endif
}

/* estimate_keysize is reverse function of USABLE_FRACTION.
*
* This can be used to reserve enough size to insert n entries without
* resizing.
*/
#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
static inline Py_ssize_t
estimate_keysize(Py_ssize_t n)
{
return calculate_keysize((n*3 + 1) / 2);
}

/* Alternative fraction that is otherwise close enough to 2n/3 to make
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
* 32 * 2/3 = 21, 32 * 5/8 = 20.
* Its advantage is that it is faster to compute on machines with slow division.
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
*/

/* GROWTH_RATE. Growth rate upon hitting maximum load.
* Currently set to used*3.
Expand Down Expand Up @@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
static int
insertion_resize(PyDictObject *mp)
{
return dictresize(mp, GROWTH_RATE(mp));
return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
}

/*
Expand Down Expand Up @@ -1194,22 +1217,19 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared().
*/
static int
dictresize(PyDictObject *mp, Py_ssize_t minsize)
dictresize(PyDictObject *mp, Py_ssize_t newsize)
{
Py_ssize_t newsize, numentries;
Py_ssize_t numentries;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
PyDictKeyEntry *oldentries, *newentries;

/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize < minsize && newsize > 0;
newsize <<= 1)
;
if (newsize <= 0) {
PyErr_NoMemory();
return -1;
}
assert(IS_POWER_OF_2(newsize));
assert(newsize >= PyDict_MINSIZE);

oldkeys = mp->ma_keys;

Expand Down Expand Up @@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
newsize = max_presize;
}
else {
Py_ssize_t minsize = ESTIMATE_SIZE(minused);
newsize = PyDict_MINSIZE*2;
while (newsize < minsize) {
newsize <<= 1;
}
newsize = estimate_keysize(minused);
}
assert(IS_POWER_OF_2(newsize));

new_keys = new_keys_object(newsize);
if (new_keys == NULL)
Expand Down Expand Up @@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;

if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
Expand All @@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;

if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
Expand Down Expand Up @@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
* that there will be no (or few) overlapping keys.
*/
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
return -1;
}
}
Expand Down