Skip to content

Commit d9323a8

Browse files
authored
bpo-41493: Refactoring dictresize (GH-21751)
Split newsize calculation into new function. dictresize() now accepts exact newsize.
1 parent 5f0769a commit d9323a8

File tree

1 file changed

+41
-26
lines changed

1 file changed

+41
-26
lines changed

Objects/dictobject.c

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ converting the dict to the combined table.
111111
#define PyDict_MINSIZE 8
112112

113113
#include "Python.h"
114+
#include "pycore_bitutils.h" // _Py_bit_length
114115
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
115116
#include "pycore_object.h" // _PyObject_GC_TRACK()
116117
#include "pycore_pyerrors.h" // _PyErr_Fetch()
@@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
236237
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
237238
Py_hash_t hash, PyObject **value_addr);
238239

239-
static int dictresize(PyDictObject *mp, Py_ssize_t minused);
240+
static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
240241

241242
static PyObject* dict_iter(PyDictObject *dict);
242243

@@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
411412
*/
412413
#define USABLE_FRACTION(n) (((n) << 1)/3)
413414

414-
/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
415+
/* Find the smallest dk_size >= minsize. */
416+
static inline Py_ssize_t
417+
calculate_keysize(Py_ssize_t minsize)
418+
{
419+
#if SIZEOF_LONG == SIZEOF_SIZE_T
420+
minsize = (minsize | PyDict_MINSIZE) - 1;
421+
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
422+
#elif defined(_MSC_VER)
423+
// On 64bit Windows, sizeof(long) == 4.
424+
minsize = (minsize | PyDict_MINSIZE) - 1;
425+
unsigned long msb;
426+
_BitScanReverse64(&msb, (uint64_t)minsize);
427+
return 1LL << (msb + 1);
428+
#else
429+
Py_ssize_t size;
430+
for (size = PyDict_MINSIZE;
431+
size < minsize && size > 0;
432+
size <<= 1)
433+
;
434+
return size;
435+
#endif
436+
}
437+
438+
/* estimate_keysize is reverse function of USABLE_FRACTION.
439+
*
415440
* This can be used to reserve enough size to insert n entries without
416441
* resizing.
417442
*/
418-
#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
443+
static inline Py_ssize_t
444+
estimate_keysize(Py_ssize_t n)
445+
{
446+
return calculate_keysize((n*3 + 1) / 2);
447+
}
419448

420-
/* Alternative fraction that is otherwise close enough to 2n/3 to make
421-
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
422-
* 32 * 2/3 = 21, 32 * 5/8 = 20.
423-
* Its advantage is that it is faster to compute on machines with slow division.
424-
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
425-
*/
426449

427450
/* GROWTH_RATE. Growth rate upon hitting maximum load.
428451
* Currently set to used*3.
@@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
10361059
static int
10371060
insertion_resize(PyDictObject *mp)
10381061
{
1039-
return dictresize(mp, GROWTH_RATE(mp));
1062+
return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
10401063
}
10411064

10421065
/*
@@ -1194,22 +1217,19 @@ After resizing a table is always combined,
11941217
but can be resplit by make_keys_shared().
11951218
*/
11961219
static int
1197-
dictresize(PyDictObject *mp, Py_ssize_t minsize)
1220+
dictresize(PyDictObject *mp, Py_ssize_t newsize)
11981221
{
1199-
Py_ssize_t newsize, numentries;
1222+
Py_ssize_t numentries;
12001223
PyDictKeysObject *oldkeys;
12011224
PyObject **oldvalues;
12021225
PyDictKeyEntry *oldentries, *newentries;
12031226

1204-
/* Find the smallest table size > minused. */
1205-
for (newsize = PyDict_MINSIZE;
1206-
newsize < minsize && newsize > 0;
1207-
newsize <<= 1)
1208-
;
12091227
if (newsize <= 0) {
12101228
PyErr_NoMemory();
12111229
return -1;
12121230
}
1231+
assert(IS_POWER_OF_2(newsize));
1232+
assert(newsize >= PyDict_MINSIZE);
12131233

12141234
oldkeys = mp->ma_keys;
12151235

@@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
13551375
newsize = max_presize;
13561376
}
13571377
else {
1358-
Py_ssize_t minsize = ESTIMATE_SIZE(minused);
1359-
newsize = PyDict_MINSIZE*2;
1360-
while (newsize < minsize) {
1361-
newsize <<= 1;
1362-
}
1378+
newsize = estimate_keysize(minused);
13631379
}
1364-
assert(IS_POWER_OF_2(newsize));
13651380

13661381
new_keys = new_keys_object(newsize);
13671382
if (new_keys == NULL)
@@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
19301945
PyObject *key;
19311946
Py_hash_t hash;
19321947

1933-
if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
1948+
if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
19341949
Py_DECREF(d);
19351950
return NULL;
19361951
}
@@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
19491964
PyObject *key;
19501965
Py_hash_t hash;
19511966

1952-
if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
1967+
if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
19531968
Py_DECREF(d);
19541969
return NULL;
19551970
}
@@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
25582573
* that there will be no (or few) overlapping keys.
25592574
*/
25602575
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
2561-
if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
2576+
if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
25622577
return -1;
25632578
}
25642579
}

0 commit comments

Comments
 (0)