From 1fc114930ec782ca7058259b80594d2aa8613c01 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 29 Apr 2024 18:22:05 -0600 Subject: [PATCH 01/23] Store the cached def in a wrapping struct rather than directly. --- Python/import.c | 61 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/Python/import.c b/Python/import.c index f120a3841b495d..7e8622ca7e61b5 100644 --- a/Python/import.c +++ b/Python/import.c @@ -940,6 +940,11 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +struct extensions_cache_value { + PyModuleDef *def; +// PyModuleDef _def; +}; + static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { @@ -984,6 +989,18 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +static void +hashtable_destroy_value(void *value) +{ + if (value != NULL) { + struct extensions_cache_value *v + = (struct extensions_cache_value *)value; + /* There's no need to decref the def since it's immortal. */ + assert(v->def == NULL || _Py_IsImmortal(v->def)); + PyMem_RawFree(value); + } +} + #define HTSEP ':' static int @@ -994,8 +1011,7 @@ _extensions_cache_init(void) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - /* There's no need to decref the def since it's immortal. */ - NULL, // value + hashtable_destroy_value, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { @@ -1027,10 +1043,10 @@ _extensions_cache_find_unlocked(PyObject *path, PyObject *name, return entry; } -static PyModuleDef * +static struct extensions_cache_value * _extensions_cache_get(PyObject *path, PyObject *name) { - PyModuleDef *def = NULL; + struct extensions_cache_value *value = NULL; extensions_lock_acquire(); _Py_hashtable_entry_t *entry = @@ -1039,11 +1055,11 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* It was never added. */ goto finally; } - def = (PyModuleDef *)entry->value; + value = (struct extensions_cache_value *)entry->value; finally: extensions_lock_release(); - return def; + return value; } static int @@ -1051,6 +1067,15 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) { int res = -1; assert(def != NULL); + + struct extensions_cache_value *value + = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); + if (value == NULL) { + PyErr_NoMemory(); + return -1; + } + value->def = def; + extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1065,7 +1090,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { /* It was never added. */ - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, value) < 0) { PyErr_NoMemory(); goto finally; } @@ -1074,11 +1099,11 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) } else if (entry->value == NULL) { /* It was previously deleted. */ - entry->value = def; + entry->value = value; } else { /* We expect it to be static, so it must be the same pointer. */ - assert((PyModuleDef *)entry->value == def); + assert(((struct extensions_cache_value *)entry->value)->def == def); /* It was already added. */ already_set = 1; } @@ -1122,7 +1147,7 @@ _extensions_cache_delete(PyObject *path, PyObject *name) However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - assert(_Py_IsImmortal(entry->value)); + hashtable_destroy_value(entry->value); entry->value = NULL; finally: @@ -1338,8 +1363,9 @@ update_global_state_for_extension(PyThreadState *tstate, // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { #ifndef NDEBUG - PyModuleDef *cached = _extensions_cache_get(path, name); - assert(cached == NULL || cached == def); + struct extensions_cache_value *cached + = _extensions_cache_get(path, name); + assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set(path, name, def) < 0) { return -1; @@ -1471,10 +1497,12 @@ import_find_extension(PyThreadState *tstate, struct _Py_ext_module_loader_info *info) { /* Only single-phase init modules will be in the cache. */ - PyModuleDef *def = _extensions_cache_get(info->path, info->name); - if (def == NULL) { + struct extensions_cache_value *value + = _extensions_cache_get(info->path, info->name); + if (value == NULL) { return NULL; } + PyModuleDef *def = value->def; assert_singlephase(def); /* It may have been successfully imported previously @@ -1594,13 +1622,14 @@ static int clear_singlephase_extension(PyInterpreterState *interp, PyObject *name, PyObject *path) { - PyModuleDef *def = _extensions_cache_get(path, name); - if (def == NULL) { + struct extensions_cache_value *value = _extensions_cache_get(path, name); + if (value == NULL) { if (PyErr_Occurred()) { return -1; } return 0; } + PyModuleDef *def = value->def; /* Clear data set when the module was initially loaded. */ def->m_base.m_init = NULL; From 254e1f1eece9b6765fccb6012b111fabc59fe7c3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 18:15:36 -0600 Subject: [PATCH 02/23] Pass the cached value around where appropriate. --- Python/import.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/Python/import.c b/Python/import.c index 7e8622ca7e61b5..ebeaacaf3052fc 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1400,9 +1400,12 @@ finish_singlephase_extension(PyThreadState *tstate, static PyObject * -reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def, +reload_singlephase_extension(PyThreadState *tstate, + struct extensions_cache_value *cached, struct _Py_ext_module_loader_info *info) { + PyModuleDef *def = cached->def; + assert(def != NULL); assert_singlephase(def); PyObject *mod = NULL; @@ -1497,13 +1500,12 @@ import_find_extension(PyThreadState *tstate, struct _Py_ext_module_loader_info *info) { /* Only single-phase init modules will be in the cache. */ - struct extensions_cache_value *value + struct extensions_cache_value *cached = _extensions_cache_get(info->path, info->name); - if (value == NULL) { + if (cached == NULL) { return NULL; } - PyModuleDef *def = value->def; - assert_singlephase(def); + assert_singlephase(cached->def); /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1514,7 +1516,7 @@ import_find_extension(PyThreadState *tstate, return NULL; } - PyObject *mod = reload_singlephase_extension(tstate, def, info); + PyObject *mod = reload_singlephase_extension(tstate, cached, info); if (mod == NULL) { return NULL; } @@ -1622,14 +1624,14 @@ static int clear_singlephase_extension(PyInterpreterState *interp, PyObject *name, PyObject *path) { - struct extensions_cache_value *value = _extensions_cache_get(path, name); - if (value == NULL) { + struct extensions_cache_value *cached = _extensions_cache_get(path, name); + if (cached == NULL) { if (PyErr_Occurred()) { return -1; } return 0; } - PyModuleDef *def = value->def; + PyModuleDef *def = cached->def; /* Clear data set when the module was initially loaded. */ def->m_base.m_init = NULL; @@ -1732,7 +1734,12 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) PyObject *mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert_singlephase(_PyModule_GetDef(mod)); +#ifndef NDEBUG + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); +#endif + assert(cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached->def); goto finally; } else if (_PyErr_Occurred(tstate)) { @@ -4086,7 +4093,13 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert_singlephase(_PyModule_GetDef(mod)); +#ifndef NDEBUG + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); +#endif + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached->def); goto finally; } else if (_PyErr_Occurred(tstate)) { From 06943187adbb0203c0f8dd24829e092a93164914 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 13:16:17 -0600 Subject: [PATCH 03/23] Pass the cached value to check_singlephase(). --- Python/import.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/Python/import.c b/Python/import.c index ebeaacaf3052fc..fefb9c9743e668 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1277,13 +1277,16 @@ _get_extension_kind(PyModuleDef *def, bool check_size) || kind == _Py_ext_module_kind_UNKNOWN); \ } while (0) -#define assert_singlephase(def) \ - assert_singlephase_def(def) +#define assert_singlephase(cached) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(cached->def, true); \ + assert(kind == _Py_ext_module_kind_SINGLEPHASE); \ + } while (0) #else /* defined(NDEBUG) */ #define assert_multiphase_def(def) #define assert_singlephase_def(def) -#define assert_singlephase(def) +#define assert_singlephase(cached) #endif @@ -1406,7 +1409,7 @@ reload_singlephase_extension(PyThreadState *tstate, { PyModuleDef *def = cached->def; assert(def != NULL); - assert_singlephase(def); + assert_singlephase(cached); PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1505,7 +1508,8 @@ import_find_extension(PyThreadState *tstate, if (cached == NULL) { return NULL; } - assert_singlephase(cached->def); + assert(cached->def != NULL); + assert_singlephase(cached); /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1736,10 +1740,10 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) assert(!_PyErr_Occurred(tstate)); #ifndef NDEBUG struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); + = _extensions_cache_get(info.path, info.name); #endif assert(cached->def == _PyModule_GetDef(mod)); - assert_singlephase(cached->def); + assert_singlephase(cached); goto finally; } else if (_PyErr_Occurred(tstate)) { @@ -4095,11 +4099,11 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) assert(!_PyErr_Occurred(tstate)); #ifndef NDEBUG struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); + = _extensions_cache_get(info.path, info.name); #endif assert(_PyModule_GetDef(mod) == NULL || cached->def == _PyModule_GetDef(mod)); - assert_singlephase(cached->def); + assert_singlephase(cached); goto finally; } else if (_PyErr_Occurred(tstate)) { From 9ca330bacf0690305b970b3399edf68d553905c8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 16:45:48 -0600 Subject: [PATCH 04/23] Add extensions_cache_value helpers. --- Python/import.c | 136 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/Python/import.c b/Python/import.c index fefb9c9743e668..f6aa438b8889c0 100644 --- a/Python/import.c +++ b/Python/import.c @@ -924,6 +924,7 @@ extensions_lock_release(void) PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex); } + /* Magic for extension modules (built-in as well as dynamically loaded). To prevent initializing an extension module more than once, we keep a static dictionary 'extensions' keyed by the tuple @@ -940,11 +941,69 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +typedef PyDictObject *cached_m_dict_t; + struct extensions_cache_value { PyModuleDef *def; -// PyModuleDef _def; }; +static struct extensions_cache_value * +alloc_extensions_cache_value(void) +{ + struct extensions_cache_value *value + = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); + if (value == NULL) { + PyErr_NoMemory(); + return NULL; + } + *value = (struct extensions_cache_value){0}; + return value; +} + +static void +free_extensions_cache_value(struct extensions_cache_value *value) +{ + PyMem_RawFree(value); +} + +static int +update_extensions_cache_value(struct extensions_cache_value *value, + PyModuleDef *def) +{ + assert(def != NULL); + /* We expect it to be static, so it must be the same pointer. */ + assert(value->def == NULL || value->def == def); + + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + *value = (struct extensions_cache_value){ + .def=def, + }; + return 0; +} + +static void +clear_extensions_cache_value(struct extensions_cache_value *value) +{ + assert(value != NULL); + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + assert(value->def == NULL || _Py_IsImmortal(value->def)); +} + +static void +del_extensions_cache_value(struct extensions_cache_value *value) +{ + if (value != NULL) { + clear_extensions_cache_value(value); + free_extensions_cache_value(value); + } +} + static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { @@ -958,6 +1017,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; + // XXX Use a buffer if it's a temp value (every case but "set"). char *key = PyMem_RawMalloc(size); if (key == NULL) { PyErr_NoMemory(); @@ -989,18 +1049,6 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } -static void -hashtable_destroy_value(void *value) -{ - if (value != NULL) { - struct extensions_cache_value *v - = (struct extensions_cache_value *)value; - /* There's no need to decref the def since it's immortal. */ - assert(v->def == NULL || _Py_IsImmortal(v->def)); - PyMem_RawFree(value); - } -} - #define HTSEP ':' static int @@ -1011,7 +1059,7 @@ _extensions_cache_init(void) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - hashtable_destroy_value, // value + (_Py_hashtable_destroy_func)del_extensions_cache_value, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { @@ -1043,6 +1091,7 @@ _extensions_cache_find_unlocked(PyObject *path, PyObject *name, return entry; } +/* This can only fail with "out of memory". */ static struct extensions_cache_value * _extensions_cache_get(PyObject *path, PyObject *name) { @@ -1062,19 +1111,20 @@ _extensions_cache_get(PyObject *path, PyObject *name) return value; } +/* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) { int res = -1; assert(def != NULL); + void *key = NULL; + struct extensions_cache_value *value = NULL; + struct extensions_cache_value *newvalue = NULL; - struct extensions_cache_value *value - = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); - if (value == NULL) { - PyErr_NoMemory(); + struct extensions_cache_value updates = {0}; + if (update_extensions_cache_value(&updates, def) < 0) { return -1; } - value->def = def; extensions_lock_acquire(); @@ -1084,35 +1134,35 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) } } - int already_set = 0; - void *key = NULL; _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { /* It was never added. */ - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, value) < 0) { + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); goto finally; } + value = newvalue; /* The hashtable owns the key now. */ key = NULL; } - else if (entry->value == NULL) { - /* It was previously deleted. */ - entry->value = value; - } else { - /* We expect it to be static, so it must be the same pointer. */ - assert(((struct extensions_cache_value *)entry->value)->def == def); - /* It was already added. */ - already_set = 1; + value = (struct extensions_cache_value *)entry->value; + if (value == NULL) { + /* It was previously deleted. */ + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + value = newvalue; + entry->value = value; + } } - if (!already_set) { - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortal((PyObject *)def); - } res = 0; finally: @@ -1120,6 +1170,14 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) if (key != NULL) { hashtable_destroy_str(key); } + + if (value != NULL) { + *value = updates; + } + else if (newvalue != NULL) { + del_extensions_cache_value(newvalue); + } + return res; } @@ -1143,13 +1201,11 @@ _extensions_cache_delete(PyObject *path, PyObject *name) /* It was already removed. */ goto finally; } - /* If we hadn't made the stored defs immortal, we would decref here. - However, this decref would be problematic if the module def were - dynamically allocated, it were the last ref, and this function - were called with an interpreter other than the def's owner. */ - hashtable_destroy_value(entry->value); + struct extensions_cache_value *value = entry->value; entry->value = NULL; + del_extensions_cache_value(value); + finally: extensions_lock_release(); } From 7e0c4ee8d1791958c8d04297d36be8dfb2bea68c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 17:00:29 -0600 Subject: [PATCH 05/23] Add get_cached_m_dict(). --- Python/import.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/Python/import.c b/Python/import.c index f6aa438b8889c0..7e381907fef928 100644 --- a/Python/import.c +++ b/Python/import.c @@ -966,6 +966,15 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static PyObject * +get_cached_m_dict(struct extensions_cache_value *value) +{ + assert(value != NULL); + assert(value->def != NULL); + Py_XINCREF(value->def->m_base.m_copy); + return value->def->m_base.m_copy; +} + static int update_extensions_cache_value(struct extensions_cache_value *value, PyModuleDef *def) @@ -1261,11 +1270,11 @@ get_core_module_dict(PyInterpreterState *interp, if (path == name) { assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "sys") == 0) { - return interp->sysdict_copy; + return Py_NewRef(interp->sysdict_copy); } assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "builtins") == 0) { - return interp->builtins_copy; + return Py_NewRef(interp->builtins_copy); } assert(!PyErr_Occurred()); } @@ -1466,6 +1475,7 @@ reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def = cached->def; assert(def != NULL); assert_singlephase(cached); + PyModInitFunction m_init = def->m_base.m_init; PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1479,8 +1489,13 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { - PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ + assert(m_init == NULL); + assert(def->m_base.m_init == NULL); + // XXX Copying the cached dict may break interpreter isolation. + // We could solve this by temporarily acquiring the original + // interpreter's GIL. + PyObject *m_copy = get_cached_m_dict(cached); if (m_copy == NULL) { /* It might be a core module (e.g. sys & builtins), for which we don't set m_copy. */ @@ -1493,14 +1508,18 @@ reload_singlephase_extension(PyThreadState *tstate, } mod = import_add_module(tstate, info->name); if (mod == NULL) { + Py_DECREF(m_copy); return NULL; } PyObject *mdict = PyModule_GetDict(mod); if (mdict == NULL) { + Py_DECREF(m_copy); Py_DECREF(mod); return NULL; } - if (PyDict_Update(mdict, m_copy)) { + int rc = PyDict_Update(mdict, m_copy); + Py_DECREF(m_copy); + if (rc < 0) { Py_DECREF(mod); return NULL; } From c1c1379c6811fbd7f57b4a821df2252b5fb23cfb Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:30:35 -0600 Subject: [PATCH 06/23] Track ext module origin. --- Include/internal/pycore_importdl.h | 12 +++++++++++- Python/import.c | 23 ++++++++++++++++++----- Python/importdl.c | 24 +++++++++++++++++++++--- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index b0af28da34e087..e5f222b371a113 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -22,6 +22,11 @@ typedef enum ext_module_kind { _Py_ext_module_kind_INVALID = 3, } _Py_ext_module_kind; +typedef enum ext_module_origin { + _Py_ext_module_origin_CORE = 1, + _Py_ext_module_origin_BUILTIN = 2, + _Py_ext_module_origin_DYNAMIC = 3, +} _Py_ext_module_origin; /* Input for loading an extension module. */ struct _Py_ext_module_loader_info { @@ -34,6 +39,7 @@ struct _Py_ext_module_loader_info { /* path is always a borrowed ref of name or filename, * depending on if it's builtin or not. */ PyObject *path; + _Py_ext_module_origin origin; const char *hook_prefix; const char *newcontext; }; @@ -42,7 +48,11 @@ extern void _Py_ext_module_loader_info_clear( extern int _Py_ext_module_loader_info_init( struct _Py_ext_module_loader_info *info, PyObject *name, - PyObject *filename); + PyObject *filename, + _Py_ext_module_origin origin); +extern int _Py_ext_module_loader_info_init_for_core( + struct _Py_ext_module_loader_info *p_info, + PyObject *name); extern int _Py_ext_module_loader_info_init_for_builtin( struct _Py_ext_module_loader_info *p_info, PyObject *name); diff --git a/Python/import.c b/Python/import.c index 7e381907fef928..9dddc9259dd3f4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -945,6 +945,8 @@ typedef PyDictObject *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; + + _Py_ext_module_origin origin; }; static struct extensions_cache_value * @@ -977,7 +979,7 @@ get_cached_m_dict(struct extensions_cache_value *value) static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def) + PyModuleDef *def, _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ @@ -989,6 +991,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, *value = (struct extensions_cache_value){ .def=def, + .origin=origin, }; return 0; } @@ -1122,16 +1125,18 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int -_extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) +_extensions_cache_set(PyObject *path, PyObject *name, + PyModuleDef *def,_Py_ext_module_origin origin) { int res = -1; assert(def != NULL); + assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); void *key = NULL; struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def) < 0) { + if (update_extensions_cache_value(&updates, def, origin) < 0) { return -1; } @@ -1358,6 +1363,7 @@ _get_extension_kind(PyModuleDef *def, bool check_size) struct singlephase_global_update { PyObject *m_dict; PyModInitFunction m_init; + _Py_ext_module_origin origin; }; static int @@ -1435,7 +1441,11 @@ update_global_state_for_extension(PyThreadState *tstate, = _extensions_cache_get(path, name); assert(cached == NULL || cached->def == def); #endif - if (_extensions_cache_set(path, name, def) < 0) { + if (_extensions_cache_set( + path, name, def, singlephase->origin) < 0) + { + // XXX Ignore this error? Doing so would effectively + // mark the module as not loadable. return -1; } } @@ -1660,7 +1670,9 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } /* Update global import state. */ - struct singlephase_global_update singlephase = {0}; + struct singlephase_global_update singlephase = { + .origin=info->origin, + }; // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. if (def->m_size == -1) { @@ -1766,6 +1778,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, struct singlephase_global_update singlephase = { /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, + .origin=_Py_ext_module_origin_CORE, }; if (update_global_state_for_extension( tstate, nameobj, nameobj, def, &singlephase) < 0) diff --git a/Python/importdl.c b/Python/importdl.c index a3091946bc94bf..38f56db4b4cc96 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -111,9 +111,12 @@ _Py_ext_module_loader_info_clear(struct _Py_ext_module_loader_info *info) int _Py_ext_module_loader_info_init(struct _Py_ext_module_loader_info *p_info, - PyObject *name, PyObject *filename) + PyObject *name, PyObject *filename, + _Py_ext_module_origin origin) { - struct _Py_ext_module_loader_info info = {0}; + struct _Py_ext_module_loader_info info = { + .origin=origin, + }; assert(name != NULL); if (!PyUnicode_Check(name)) { @@ -183,12 +186,25 @@ _Py_ext_module_loader_info_init_for_builtin( .name_encoded=name_encoded, /* We won't need filename. */ .path=name, + .origin=_Py_ext_module_origin_BUILTIN, .hook_prefix=ascii_only_prefix, .newcontext=NULL, }; return 0; } +int +_Py_ext_module_loader_info_init_for_core( + struct _Py_ext_module_loader_info *info, + PyObject *name) +{ + if (_Py_ext_module_loader_info_init_for_builtin(info, name) < 0) { + return -1; + } + info->origin = _Py_ext_module_origin_CORE; + return 0; +} + int _Py_ext_module_loader_info_init_from_spec( struct _Py_ext_module_loader_info *p_info, @@ -203,7 +219,9 @@ _Py_ext_module_loader_info_init_from_spec( Py_DECREF(name); return -1; } - int err = _Py_ext_module_loader_info_init(p_info, name, filename); + /* We could also accommodate builtin modules here without much trouble. */ + _Py_ext_module_origin origin = _Py_ext_module_origin_DYNAMIC; + int err = _Py_ext_module_loader_info_init(p_info, name, filename, origin); Py_DECREF(name); Py_DECREF(filename); return err; From 8485fb5c24f7cc1b89b2b45e4536d1fab4081466 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:35:51 -0600 Subject: [PATCH 07/23] Look up core module dicts in get_cached_m_dict(). --- Python/import.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/Python/import.c b/Python/import.c index 9dddc9259dd3f4..e22b2eab94d0ce 100644 --- a/Python/import.c +++ b/Python/import.c @@ -968,13 +968,24 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static PyObject * get_core_module_dict( + PyInterpreterState *interp, PyObject *name, PyObject *path); + static PyObject * -get_cached_m_dict(struct extensions_cache_value *value) +get_cached_m_dict(struct extensions_cache_value *value, + PyObject *name, PyObject *path) { assert(value != NULL); + PyInterpreterState *interp = _PyInterpreterState_GET(); + /* It might be a core module (e.g. sys & builtins), + for which we don't cache m_dict. */ + if (value->origin == _Py_ext_module_origin_CORE) { + return get_core_module_dict(interp, name, path); + } assert(value->def != NULL); - Py_XINCREF(value->def->m_base.m_copy); - return value->def->m_base.m_copy; + PyObject *m_dict = value->def->m_base.m_copy; + Py_XINCREF(m_dict); + return m_dict; } static int @@ -1505,16 +1516,10 @@ reload_singlephase_extension(PyThreadState *tstate, // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original // interpreter's GIL. - PyObject *m_copy = get_cached_m_dict(cached); + PyObject *m_copy = get_cached_m_dict(cached, info->name, info->path); if (m_copy == NULL) { - /* It might be a core module (e.g. sys & builtins), - for which we don't set m_copy. */ - m_copy = get_core_module_dict( - tstate->interp, info->name, info->path); - if (m_copy == NULL) { - assert(!PyErr_Occurred()); - return NULL; - } + assert(!PyErr_Occurred()); + return NULL; } mod = import_add_module(tstate, info->name); if (mod == NULL) { From abbddde74a363267409368a143487431572c122c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 18:50:08 -0600 Subject: [PATCH 08/23] Pass m_dict to _extensions_cache_set(). --- Python/import.c | 127 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 38 deletions(-) diff --git a/Python/import.c b/Python/import.c index e22b2eab94d0ce..5529402ca4ae60 100644 --- a/Python/import.c +++ b/Python/import.c @@ -968,6 +968,56 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static int +set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +{ + assert(value != NULL); + assert(value->def != NULL); + + PyObject *copied = NULL; + if (m_dict != NULL) { + assert(PyDict_Check(m_dict)); + copied = PyDict_Copy(m_dict); + if (copied == NULL) { + /* We expect this can only be "out of memory". */ + return -1; + } + } + + /* XXX gh-88216: The copied dict is owned by the current + * interpreter. That's a problem if the interpreter has + * its own obmalloc state or if the module is successfully + * imported into such an interpreter. If the interpreter + * has its own GIL then there may be data races and + * PyImport_ClearModulesByIndex() can crash. Normally, + * a single-phase init module cannot be imported in an + * isolated interpreter, but there are ways around that. + * Hence, heere be dragons! Ideally we would instead do + * something like make a read-only, immortal copy of the + * dict using PyMem_RawMalloc() and store *that* in m_copy. + * Then we'd need to make sure to clear that when the + * runtime is finalized, rather than in + * PyImport_ClearModulesByIndex(). */ + PyObject *old = value->def->m_base.m_copy; + if (old != NULL) { + // XXX This should really never happen. + Py_DECREF(old); + } + + value->def->m_base.m_copy = copied; + return 0; +} + +static void +del_cached_m_dict(struct extensions_cache_value *value) +{ + assert(value != NULL); + if (value->def != NULL) { + Py_XDECREF((PyObject *)value->def->m_base.m_copy); + value->def->m_base.m_copy = NULL; + } +} + static PyObject * get_core_module_dict( PyInterpreterState *interp, PyObject *name, PyObject *path); @@ -990,23 +1040,44 @@ get_cached_m_dict(struct extensions_cache_value *value, static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, _Py_ext_module_origin origin) + PyModuleDef *def, PyObject *m_dict, + _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ assert(value->def == NULL || value->def == def); + assert(def->m_base.m_init == NULL || m_dict == NULL); + /* For now we don't worry about comparing value->m_copy. */ + assert(def->m_base.m_copy == NULL || m_dict != NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ _Py_SetImmortalUntracked((PyObject *)def); - *value = (struct extensions_cache_value){ + struct extensions_cache_value temp = { .def=def, .origin=origin, }; + if (set_cached_m_dict(&temp, m_dict) < 0) { + return -1; + } + + *value = temp; return 0; } +static void +copy_extensions_cache_value(struct extensions_cache_value *value, + struct extensions_cache_value *updates) +{ + if (value->def != NULL + && value->def->m_base.m_copy != updates->def->m_base.m_copy) + { + del_cached_m_dict(value); + } + *value = *updates; +} + static void clear_extensions_cache_value(struct extensions_cache_value *value) { @@ -1016,6 +1087,7 @@ clear_extensions_cache_value(struct extensions_cache_value *value) dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ assert(value->def == NULL || _Py_IsImmortal(value->def)); + del_cached_m_dict(value); } static void @@ -1137,7 +1209,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, - PyModuleDef *def,_Py_ext_module_origin origin) + PyModuleDef *def, PyObject *m_dict, + _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1147,7 +1220,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def, origin) < 0) { + if (update_extensions_cache_value(&updates, def, m_dict, origin) < 0) { return -1; } @@ -1191,18 +1264,18 @@ _extensions_cache_set(PyObject *path, PyObject *name, res = 0; finally: - extensions_lock_release(); - if (key != NULL) { - hashtable_destroy_str(key); - } - if (value != NULL) { - *value = updates; + copy_extensions_cache_value(value, &updates); } else if (newvalue != NULL) { del_extensions_cache_value(newvalue); } + extensions_lock_release(); + if (key != NULL) { + hashtable_destroy_str(key); + } + return res; } @@ -1383,6 +1456,8 @@ update_global_state_for_extension(PyThreadState *tstate, PyModuleDef *def, struct singlephase_global_update *singlephase) { + PyObject *m_dict = NULL; + /* Copy the module's __dict__, if applicable. */ if (singlephase == NULL) { assert(def->m_base.m_copy == NULL); @@ -1408,39 +1483,15 @@ update_global_state_for_extension(PyThreadState *tstate, assert(def->m_base.m_init == NULL); } else { - assert(PyDict_Check(singlephase->m_dict)); + assert(singlephase->m_dict != NULL + && PyDict_Check(singlephase->m_dict)); // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. assert(def->m_size == -1); assert(!is_core_module(tstate->interp, name, path)); assert(PyUnicode_CompareWithASCIIString(name, "sys") != 0); assert(PyUnicode_CompareWithASCIIString(name, "builtins") != 0); - /* XXX gh-88216: The copied dict is owned by the current - * interpreter. That's a problem if the interpreter has - * its own obmalloc state or if the module is successfully - * imported into such an interpreter. If the interpreter - * has its own GIL then there may be data races and - * PyImport_ClearModulesByIndex() can crash. Normally, - * a single-phase init module cannot be imported in an - * isolated interpreter, but there are ways around that. - * Hence, heere be dragons! Ideally we would instead do - * something like make a read-only, immortal copy of the - * dict using PyMem_RawMalloc() and store *that* in m_copy. - * Then we'd need to make sure to clear that when the - * runtime is finalized, rather than in - * PyImport_ClearModulesByIndex(). */ - if (def->m_base.m_copy) { - /* Somebody already imported the module, - likely under a different name. - XXX this should really not happen. */ - Py_CLEAR(def->m_base.m_copy); - } - def->m_base.m_copy = PyDict_Copy(singlephase->m_dict); - if (def->m_base.m_copy == NULL) { - // XXX Ignore this error? Doing so would effectively - // mark the module as not loadable. */ - return -1; - } + m_dict = singlephase->m_dict; } } @@ -1453,7 +1504,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, singlephase->origin) < 0) + path, name, def, m_dict, singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. From 12bcd96c79ff60969e0cc64a51cc083d8aaa7bda Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 19:07:24 -0600 Subject: [PATCH 09/23] Pass m_init to _extensions_cache_set(). --- Python/import.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/Python/import.c b/Python/import.c index 5529402ca4ae60..a49433f275b624 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1040,13 +1040,16 @@ get_cached_m_dict(struct extensions_cache_value *value, static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, PyObject *m_dict, - _Py_ext_module_origin origin) + PyModuleDef *def, PyModInitFunction m_init, + PyObject *m_dict, _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ assert(value->def == NULL || value->def == def); - assert(def->m_base.m_init == NULL || m_dict == NULL); + assert(m_init == NULL || m_dict == NULL); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); @@ -1058,6 +1061,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, .def=def, .origin=origin, }; + def->m_base.m_init = m_init; if (set_cached_m_dict(&temp, m_dict) < 0) { return -1; } @@ -1209,8 +1213,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, - PyModuleDef *def, PyObject *m_dict, - _Py_ext_module_origin origin) + PyModuleDef *def, PyModInitFunction m_init, + PyObject *m_dict, _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1220,7 +1224,9 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def, m_dict, origin) < 0) { + if (update_extensions_cache_value( + &updates, def, m_init, m_dict, origin) < 0) + { return -1; } @@ -1456,10 +1462,12 @@ update_global_state_for_extension(PyThreadState *tstate, PyModuleDef *def, struct singlephase_global_update *singlephase) { + PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; /* Copy the module's __dict__, if applicable. */ if (singlephase == NULL) { + assert(def->m_base.m_init == NULL); assert(def->m_base.m_copy == NULL); } else { @@ -1473,7 +1481,7 @@ update_global_state_for_extension(PyThreadState *tstate, // We should prevent this somehow. The simplest solution // is probably to store m_copy/m_init in the cache along // with the def, rather than within the def. - def->m_base.m_init = singlephase->m_init; + m_init = singlephase->m_init; } else if (singlephase->m_dict == NULL) { /* It must be a core builtin module. */ @@ -1504,7 +1512,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, m_dict, singlephase->origin) < 0) + path, name, def, m_init, m_dict, singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. @@ -1547,7 +1555,6 @@ reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def = cached->def; assert(def != NULL); assert_singlephase(cached); - PyModInitFunction m_init = def->m_base.m_init; PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1562,7 +1569,6 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { /* Module does not support repeated initialization */ - assert(m_init == NULL); assert(def->m_base.m_init == NULL); // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original @@ -1598,12 +1604,14 @@ reload_singlephase_extension(PyThreadState *tstate, || _PyModule_GetDef(mod) == def); } else { - if (def->m_base.m_init == NULL) { + assert(def->m_base.m_copy == NULL); + PyModInitFunction p0 = def->m_base.m_init; + if (p0 == NULL) { assert(!PyErr_Occurred()); return NULL; } struct _Py_ext_module_loader_result res; - if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) { + if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { _Py_ext_module_loader_result_apply_error(&res, name_buf); return NULL; } From 2d56adaa7d05572f9e92c0e287052fcbdfb31a9d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 20:13:25 -0600 Subject: [PATCH 10/23] Add some comments. --- Python/import.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/import.c b/Python/import.c index a49433f275b624..f630acd8b35f04 100644 --- a/Python/import.c +++ b/Python/import.c @@ -587,6 +587,8 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) /* cleanup the saved copy of module dicts */ PyModuleDef *md = PyModule_GetDef(m); if (md) { + // XXX Do this more carefully. The dict might be owned + // by another interpreter. Py_CLEAR(md->m_base.m_copy); } } @@ -982,6 +984,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) /* We expect this can only be "out of memory". */ return -1; } + // XXX We may want to make copied immortal. } /* XXX gh-88216: The copied dict is owned by the current From 9c70d5317ada2bbc54b3fd47ca88c2ee1af59752 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 20:23:32 -0600 Subject: [PATCH 11/23] Use del_cached_m_dict() in set_cached_m_dict(). --- Python/import.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Python/import.c b/Python/import.c index f630acd8b35f04..088b02bbd52fa2 100644 --- a/Python/import.c +++ b/Python/import.c @@ -970,6 +970,8 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static void del_cached_m_dict(struct extensions_cache_value *); + static int set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) { @@ -1001,11 +1003,9 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * Then we'd need to make sure to clear that when the * runtime is finalized, rather than in * PyImport_ClearModulesByIndex(). */ - PyObject *old = value->def->m_base.m_copy; - if (old != NULL) { - // XXX This should really never happen. - Py_DECREF(old); - } + /* Ideally, the cached dict would always be NULL at this point. */ + assert(value->def->m_base.m_copy == NULL || copied != NULL); + del_cached_m_dict(value); value->def->m_base.m_copy = copied; return 0; @@ -1016,7 +1016,7 @@ del_cached_m_dict(struct extensions_cache_value *value) { assert(value != NULL); if (value->def != NULL) { - Py_XDECREF((PyObject *)value->def->m_base.m_copy); + Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; } } @@ -1069,6 +1069,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, return -1; } + del_cached_m_dict(value); *value = temp; return 0; } From 0098057614166c1dd1a3326a3f5b2ef0720e27b0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 19:32:59 -0600 Subject: [PATCH 12/23] Add m_init and m_dict fields to extensions_cache_value. --- Python/import.c | 86 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/Python/import.c b/Python/import.c index 088b02bbd52fa2..a61a7ed3c02fd2 100644 --- a/Python/import.c +++ b/Python/import.c @@ -948,6 +948,18 @@ typedef PyDictObject *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; + /* The function used to re-initialize the module. + This is only set for legacy (single-phase init) extension modules + and only used for those that support multiple initializations + (m_size >= 0). + It is set by update_global_state_for_extension(). */ + PyModInitFunction m_init; + /* A copy of the module's __dict__ after the first time it was loaded. + This is only set/used for legacy modules that do not support + multiple initializations. + It is set by update_global_state_for_extension(). */ + cached_m_dict_t m_dict; + _Py_ext_module_origin origin; }; @@ -970,8 +982,6 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } -static void del_cached_m_dict(struct extensions_cache_value *); - static int set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) { @@ -1005,20 +1015,27 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * PyImport_ClearModulesByIndex(). */ /* Ideally, the cached dict would always be NULL at this point. */ assert(value->def->m_base.m_copy == NULL || copied != NULL); - del_cached_m_dict(value); + assert(value->m_dict == NULL || copied != NULL); + Py_XDECREF(value->def->m_base.m_copy); + Py_XDECREF((PyObject *)value->m_dict); - value->def->m_base.m_copy = copied; + value->def->m_base.m_copy = Py_XNewRef(copied); + value->m_dict = (cached_m_dict_t)copied; return 0; } static void -del_cached_m_dict(struct extensions_cache_value *value) +del_cached_m_dict(struct extensions_cache_value *value, + struct extensions_cache_value *compare) { - assert(value != NULL); - if (value->def != NULL) { + if (compare == NULL && value->def != NULL) { Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; } + if (compare == NULL || compare->m_dict != value->m_dict) { + Py_XDECREF((PyObject *)value->m_dict); + value->m_dict = NULL; + } } static PyObject * get_core_module_dict( @@ -1036,6 +1053,7 @@ get_cached_m_dict(struct extensions_cache_value *value, return get_core_module_dict(interp, name, path); } assert(value->def != NULL); + // XXX Switch to value->m_dict. PyObject *m_dict = value->def->m_base.m_copy; Py_XINCREF(m_dict); return m_dict; @@ -1052,9 +1070,10 @@ update_extensions_cache_value(struct extensions_cache_value *value, assert(m_init == NULL || m_dict == NULL); /* We expect the same symbol to be used and the shared object file * to have remained loaded, so it must be the same pointer. */ - assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); + assert(value->m_init == NULL || value->m_init == m_init); /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); + assert(value->m_dict == NULL || m_dict != NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ @@ -1062,6 +1081,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, struct extensions_cache_value temp = { .def=def, + .m_init=m_init, .origin=origin, }; def->m_base.m_init = m_init; @@ -1069,7 +1089,14 @@ update_extensions_cache_value(struct extensions_cache_value *value, return -1; } - del_cached_m_dict(value); + if (m_init != NULL) { + assert(temp.m_dict == NULL); + } + else { + assert(temp.m_init == NULL); + } + + del_cached_m_dict(value, NULL); *value = temp; return 0; } @@ -1078,11 +1105,11 @@ static void copy_extensions_cache_value(struct extensions_cache_value *value, struct extensions_cache_value *updates) { - if (value->def != NULL - && value->def->m_base.m_copy != updates->def->m_base.m_copy) - { - del_cached_m_dict(value); - } + assert(updates != value); + assert(value->def == NULL || updates->def == value->def); + // XXX Skipping this (or an extra incref in set_cached_m_dict() + // should not be necessary. + //del_cached_m_dict(value, updates); *value = *updates; } @@ -1095,7 +1122,7 @@ clear_extensions_cache_value(struct extensions_cache_value *value) dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ assert(value->def == NULL || _Py_IsImmortal(value->def)); - del_cached_m_dict(value); + del_cached_m_dict(value, NULL); } static void @@ -1238,7 +1265,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, if (EXTENSIONS.hashtable == NULL) { if (_extensions_cache_init() < 0) { - goto finally; + goto error; } } @@ -1248,11 +1275,11 @@ _extensions_cache_set(PyObject *path, PyObject *name, /* It was never added. */ newvalue = alloc_extensions_cache_value(); if (newvalue == NULL) { - goto finally; + goto error; } if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); - goto finally; + goto error; } value = newvalue; /* The hashtable owns the key now. */ @@ -1264,23 +1291,24 @@ _extensions_cache_set(PyObject *path, PyObject *name, /* It was previously deleted. */ newvalue = alloc_extensions_cache_value(); if (newvalue == NULL) { - goto finally; + goto error; } value = newvalue; entry->value = value; } } + copy_extensions_cache_value(value, &updates); res = 0; + goto finally; -finally: - if (value != NULL) { - copy_extensions_cache_value(value, &updates); - } - else if (newvalue != NULL) { +error: + if (newvalue != NULL) { del_extensions_cache_value(newvalue); } + clear_extensions_cache_value(&updates); +finally: extensions_lock_release(); if (key != NULL) { hashtable_destroy_str(key); @@ -1396,6 +1424,7 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path) return 0; } + #ifndef NDEBUG static _Py_ext_module_kind _get_extension_kind(PyModuleDef *def, bool check_size) @@ -1469,7 +1498,7 @@ update_global_state_for_extension(PyThreadState *tstate, PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; - /* Copy the module's __dict__, if applicable. */ + /* Set up for _extensions_cache_set(). */ if (singlephase == NULL) { assert(def->m_base.m_init == NULL); assert(def->m_base.m_copy == NULL); @@ -1495,8 +1524,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(def->m_base.m_init == NULL); } else { - assert(singlephase->m_dict != NULL - && PyDict_Check(singlephase->m_dict)); + assert(PyDict_Check(singlephase->m_dict)); // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. assert(def->m_size == -1); @@ -1573,6 +1601,7 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { /* Module does not support repeated initialization */ + assert(cached->m_init == NULL); assert(def->m_base.m_init == NULL); // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original @@ -1608,7 +1637,9 @@ reload_singlephase_extension(PyThreadState *tstate, || _PyModule_GetDef(mod) == def); } else { + assert(cached->m_dict == NULL); assert(def->m_base.m_copy == NULL); + // XXX Use cached->m_init. PyModInitFunction p0 = def->m_base.m_init; if (p0 == NULL) { assert(!PyErr_Occurred()); @@ -1752,6 +1783,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, else { /* We will reload via the init function. */ assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); singlephase.m_init = p0; } if (update_global_state_for_extension( From 57aaba259c00f8fa95ea83f55a40f49301c4782a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:43:31 -0600 Subject: [PATCH 13/23] Track which interpreter was used to create m_dict. --- Python/import.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Python/import.c b/Python/import.c index a61a7ed3c02fd2..4d61776811771b 100644 --- a/Python/import.c +++ b/Python/import.c @@ -959,6 +959,7 @@ struct extensions_cache_value { multiple initializations. It is set by update_global_state_for_extension(). */ cached_m_dict_t m_dict; + PyInterpreterState *m_dict_interp; _Py_ext_module_origin origin; }; @@ -988,8 +989,11 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) assert(value != NULL); assert(value->def != NULL); + PyInterpreterState *interp = NULL; PyObject *copied = NULL; if (m_dict != NULL) { + interp = _PyInterpreterState_GET(); + assert(value->origin != _Py_ext_module_origin_CORE); assert(PyDict_Check(m_dict)); copied = PyDict_Copy(m_dict); if (copied == NULL) { @@ -1021,6 +1025,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) value->def->m_base.m_copy = Py_XNewRef(copied); value->m_dict = (cached_m_dict_t)copied; + value->m_dict_interp = interp; return 0; } @@ -1074,6 +1079,8 @@ update_extensions_cache_value(struct extensions_cache_value *value, /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); assert(value->m_dict == NULL || m_dict != NULL); + assert((value->m_dict == NULL) == (value->m_dict_interp == NULL)); + assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ From 96932eb8601897a8283535de5cb46fcae3f54c5d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 12:12:26 -0600 Subject: [PATCH 14/23] Track the interpreter ID. --- Python/import.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Python/import.c b/Python/import.c index 4d61776811771b..14e6657266b9b9 100644 --- a/Python/import.c +++ b/Python/import.c @@ -959,10 +959,12 @@ struct extensions_cache_value { multiple initializations. It is set by update_global_state_for_extension(). */ cached_m_dict_t m_dict; - PyInterpreterState *m_dict_interp; + int64_t m_dict_interpid; _Py_ext_module_origin origin; }; +#define EXTENSIONS_CACHE_VALUE_INIT \ + (struct extensions_cache_value){ .m_dict_interpid=-1 } static struct extensions_cache_value * alloc_extensions_cache_value(void) @@ -973,7 +975,7 @@ alloc_extensions_cache_value(void) PyErr_NoMemory(); return NULL; } - *value = (struct extensions_cache_value){0}; + *value = EXTENSIONS_CACHE_VALUE_INIT; return value; } @@ -989,10 +991,9 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) assert(value != NULL); assert(value->def != NULL); - PyInterpreterState *interp = NULL; + int64_t interpid = -1; PyObject *copied = NULL; if (m_dict != NULL) { - interp = _PyInterpreterState_GET(); assert(value->origin != _Py_ext_module_origin_CORE); assert(PyDict_Check(m_dict)); copied = PyDict_Copy(m_dict); @@ -1001,6 +1002,11 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) return -1; } // XXX We may want to make copied immortal. + + value->def->m_base.m_cache_has_m_dict = 1; + + PyInterpreterState *interp = _PyInterpreterState_GET(); + interpid = PyInterpreterState_GetID(interp); } /* XXX gh-88216: The copied dict is owned by the current @@ -1025,7 +1031,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) value->def->m_base.m_copy = Py_XNewRef(copied); value->m_dict = (cached_m_dict_t)copied; - value->m_dict_interp = interp; + value->m_dict_interpid = interpid; return 0; } @@ -1079,7 +1085,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interp == NULL)); + assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); /* We assume that all module defs are statically allocated @@ -1261,7 +1267,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; - struct extensions_cache_value updates = {0}; + struct extensions_cache_value updates = EXTENSIONS_CACHE_VALUE_INIT; if (update_extensions_cache_value( &updates, def, m_init, m_dict, origin) < 0) { From a4cb134f828b6075fb943dcbf7a20df6370707b3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:35:51 -0600 Subject: [PATCH 15/23] Look up core module dicts in get_cached_m_dict(). --- Python/import.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/import.c b/Python/import.c index 14e6657266b9b9..621a3e3d5492f7 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1001,9 +1001,8 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) /* We expect this can only be "out of memory". */ return -1; } - // XXX We may want to make copied immortal. - value->def->m_base.m_cache_has_m_dict = 1; + // XXX We may want to make copied immortal. PyInterpreterState *interp = _PyInterpreterState_GET(); interpid = PyInterpreterState_GetID(interp); From c4d3be19d23ec788c1a708912fa74a1664500c44 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 12:12:56 -0600 Subject: [PATCH 16/23] Make sure we only set m_dict for non-isolated interpreters. --- Python/import.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Python/import.c b/Python/import.c index 621a3e3d5492f7..36d86f056f5571 100644 --- a/Python/import.c +++ b/Python/import.c @@ -34,6 +34,17 @@ module _imp #include "clinic/import.c.h" +#ifndef NDEBUG +static bool +is_interpreter_isolated(PyInterpreterState *interp) +{ + return !_Py_IsMainInterpreter(interp) + && !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + && interp->ceval.own_gil; +} +#endif + + /*******************************/ /* process-global import state */ /*******************************/ @@ -1006,6 +1017,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) PyInterpreterState *interp = _PyInterpreterState_GET(); interpid = PyInterpreterState_GetID(interp); + assert(!is_interpreter_isolated(interp)); } /* XXX gh-88216: The copied dict is owned by the current From d87584d44f65b9aeeb98b4113945913026ef429d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 2 May 2024 18:16:42 -0600 Subject: [PATCH 17/23] Simplify. --- Python/import.c | 104 +++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 64 deletions(-) diff --git a/Python/import.c b/Python/import.c index 36d86f056f5571..4d2b5509f9c292 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1081,62 +1081,6 @@ get_cached_m_dict(struct extensions_cache_value *value, return m_dict; } -static int -update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, PyModInitFunction m_init, - PyObject *m_dict, _Py_ext_module_origin origin) -{ - assert(def != NULL); - /* We expect it to be static, so it must be the same pointer. */ - assert(value->def == NULL || value->def == def); - assert(m_init == NULL || m_dict == NULL); - /* We expect the same symbol to be used and the shared object file - * to have remained loaded, so it must be the same pointer. */ - assert(value->m_init == NULL || value->m_init == m_init); - /* For now we don't worry about comparing value->m_copy. */ - assert(def->m_base.m_copy == NULL || m_dict != NULL); - assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); - assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortalUntracked((PyObject *)def); - - struct extensions_cache_value temp = { - .def=def, - .m_init=m_init, - .origin=origin, - }; - def->m_base.m_init = m_init; - if (set_cached_m_dict(&temp, m_dict) < 0) { - return -1; - } - - if (m_init != NULL) { - assert(temp.m_dict == NULL); - } - else { - assert(temp.m_init == NULL); - } - - del_cached_m_dict(value, NULL); - *value = temp; - return 0; -} - -static void -copy_extensions_cache_value(struct extensions_cache_value *value, - struct extensions_cache_value *updates) -{ - assert(updates != value); - assert(value->def == NULL || updates->def == value->def); - // XXX Skipping this (or an extra incref in set_cached_m_dict() - // should not be necessary. - //del_cached_m_dict(value, updates); - *value = *updates; -} - static void clear_extensions_cache_value(struct extensions_cache_value *value) { @@ -1273,18 +1217,35 @@ _extensions_cache_set(PyObject *path, PyObject *name, { int res = -1; assert(def != NULL); + assert(m_init == NULL || m_dict == NULL); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); + /* For now we don't worry about comparing value->m_copy. */ + assert(def->m_base.m_copy == NULL || m_dict != NULL); assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); - void *key = NULL; - struct extensions_cache_value *value = NULL; - struct extensions_cache_value *newvalue = NULL; + assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - struct extensions_cache_value updates = EXTENSIONS_CACHE_VALUE_INIT; - if (update_extensions_cache_value( - &updates, def, m_init, m_dict, origin) < 0) - { + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + /* Generate the new cache value data before taking the lock. */ + struct extensions_cache_value updates = { + .def=def, + .m_init=m_init, + .origin=origin, + }; + def->m_base.m_init = m_init; + if (set_cached_m_dict(&updates, m_dict) < 0) { return -1; } + /* Move on to the locked section. */ + void *key = NULL; + struct extensions_cache_value *value = NULL; + struct extensions_cache_value *newvalue = NULL; + extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1293,6 +1254,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, } } + /* Ensure there's a cached value for the module. */ _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { @@ -1320,9 +1282,23 @@ _extensions_cache_set(PyObject *path, PyObject *name, value = newvalue; entry->value = value; } + else { + /* We expect it to be static, so it must be the same pointer. */ + assert(value->def == def); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(value->m_init == m_init); + assert(value->m_dict == NULL || m_dict != NULL); + assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); + } } - copy_extensions_cache_value(value, &updates); + /* Fill the cache value with the data we generated earlier. */ + // XXX Skipping this (or an extra incref in set_cached_m_dict() + // should not be necessary. + //del_cached_m_dict(value, updates); + *value = updates; + res = 0; goto finally; From 336d1da8c9f0469d90df2f93c8cf32a061ce32c4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 10:36:32 -0600 Subject: [PATCH 18/23] Fix a comment. --- Python/import.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/import.c b/Python/import.c index 4d2b5509f9c292..9151d9911d54bf 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1630,9 +1630,9 @@ reload_singlephase_extension(PyThreadState *tstate, } /* We can't set mod->md_def if it's missing, * because _PyImport_ClearModulesByIndex() might break - * due to violating interpreter isolation. See the note - * in fix_up_extension_for_interpreter(). Until that - * is solved, we leave md_def set to NULL. */ + * due to violating interpreter isolation. + * See the note in set_cached_m_dict(). + * Until that is solved, we leave md_def set to NULL. */ assert(_PyModule_GetDef(mod) == NULL || _PyModule_GetDef(mod) == def); } From bd1e134bad535db447dbf1647ed2b11f674b6341 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:02:46 -0600 Subject: [PATCH 19/23] Separate the def logic from the rest of the extensions cache code. --- Python/import.c | 311 ++++++++++++++++++++++++++++++------------------ 1 file changed, 193 insertions(+), 118 deletions(-) diff --git a/Python/import.c b/Python/import.c index 9151d9911d54bf..c56e85da3465ad 100644 --- a/Python/import.c +++ b/Python/import.c @@ -954,7 +954,12 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ -typedef PyDictObject *cached_m_dict_t; +typedef struct cached_m_dict { + /* A shallow copy of the original module's __dict__. */ + PyObject *copied; + /* The interpreter that owns the copy. */ + int64_t interpid; +} *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; @@ -965,17 +970,16 @@ struct extensions_cache_value { (m_size >= 0). It is set by update_global_state_for_extension(). */ PyModInitFunction m_init; + /* A copy of the module's __dict__ after the first time it was loaded. This is only set/used for legacy modules that do not support multiple initializations. - It is set by update_global_state_for_extension(). */ + It is set exclusively by fixup_cached_def(). */ cached_m_dict_t m_dict; - int64_t m_dict_interpid; + struct cached_m_dict _m_dict; _Py_ext_module_origin origin; }; -#define EXTENSIONS_CACHE_VALUE_INIT \ - (struct extensions_cache_value){ .m_dict_interpid=-1 } static struct extensions_cache_value * alloc_extensions_cache_value(void) @@ -986,7 +990,7 @@ alloc_extensions_cache_value(void) PyErr_NoMemory(); return NULL; } - *value = EXTENSIONS_CACHE_VALUE_INIT; + *value = (struct extensions_cache_value){0}; return value; } @@ -996,29 +1000,78 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } -static int -set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +static void +fixup_cached_def(struct extensions_cache_value *value) +{ + /* For the moment, the values in the def's m_base may belong + * to another module, and we're replacing them here. This can + * cause problems later if the old module is reloaded. + * + * Also, we don't decref any old cached values first when we + * replace them here, in case we need to restore them in the + * near future. Instead, the caller is responsible for wrapping + * this up by calling cleanup_old_cached_def() or + * restore_old_cached_def() if there was an error. */ + PyModuleDef *def = value->def; + assert(def != NULL); + + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + def->m_base.m_init = value->m_init; + + /* Different modules can share the same def, so we can't just + * expect m_copy to be NULL. */ + assert(def->m_base.m_copy == NULL + || def->m_base.m_init == NULL + || value->m_dict != NULL); + if (value->m_dict != NULL) { + assert(value->m_dict->copied != NULL); + /* As noted above, we don't first decref the old value, if any. */ + def->m_base.m_copy = Py_NewRef(value->m_dict->copied); + } +} + +static void +restore_old_cached_def(PyModuleDef *def, PyModuleDef_Base *oldbase) { - assert(value != NULL); - assert(value->def != NULL); + def->m_base = *oldbase; +} - int64_t interpid = -1; - PyObject *copied = NULL; - if (m_dict != NULL) { - assert(value->origin != _Py_ext_module_origin_CORE); - assert(PyDict_Check(m_dict)); - copied = PyDict_Copy(m_dict); - if (copied == NULL) { - /* We expect this can only be "out of memory". */ - return -1; - } +static void +cleanup_old_cached_def(PyModuleDef_Base *oldbase) +{ + Py_XDECREF(oldbase->m_copy); +} + +static void +del_cached_def(struct extensions_cache_value *value) +{ + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + assert(value->def == NULL || _Py_IsImmortal(value->def)); - // XXX We may want to make copied immortal. + Py_XDECREF(value->def->m_base.m_copy); + value->def->m_base.m_copy = NULL; +} - PyInterpreterState *interp = _PyInterpreterState_GET(); - interpid = PyInterpreterState_GetID(interp); - assert(!is_interpreter_isolated(interp)); +static int +init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +{ + assert(value != NULL); + /* This should only have been called without an m_dict already set. */ + assert(value->m_dict == NULL); + if (m_dict == NULL) { + return 0; } + assert(PyDict_Check(m_dict)); + assert(value->origin != _Py_ext_module_origin_CORE); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!is_interpreter_isolated(interp)); /* XXX gh-88216: The copied dict is owned by the current * interpreter. That's a problem if the interpreter has @@ -1034,28 +1087,34 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * Then we'd need to make sure to clear that when the * runtime is finalized, rather than in * PyImport_ClearModulesByIndex(). */ - /* Ideally, the cached dict would always be NULL at this point. */ - assert(value->def->m_base.m_copy == NULL || copied != NULL); - assert(value->m_dict == NULL || copied != NULL); - Py_XDECREF(value->def->m_base.m_copy); - Py_XDECREF((PyObject *)value->m_dict); + PyObject *copied = PyDict_Copy(m_dict); + if (copied == NULL) { + /* We expect this can only be "out of memory". */ + return -1; + } + // XXX We may want to make the copy immortal. + // XXX This incref shouldn't be necessary. We are decref'ing + // one to many times somewhere. + Py_INCREF(copied); - value->def->m_base.m_copy = Py_XNewRef(copied); - value->m_dict = (cached_m_dict_t)copied; - value->m_dict_interpid = interpid; + value->_m_dict = (struct cached_m_dict){ + .copied=copied, + .interpid=PyInterpreterState_GetID(interp), + }; + + value->m_dict = &value->_m_dict; return 0; } static void -del_cached_m_dict(struct extensions_cache_value *value, - struct extensions_cache_value *compare) -{ - if (compare == NULL && value->def != NULL) { - Py_XDECREF(value->def->m_base.m_copy); - value->def->m_base.m_copy = NULL; - } - if (compare == NULL || compare->m_dict != value->m_dict) { - Py_XDECREF((PyObject *)value->m_dict); +del_cached_m_dict(struct extensions_cache_value *value) +{ + if (value->m_dict != NULL) { + assert(value->m_dict == &value->_m_dict); + assert(value->m_dict->copied != NULL); + /* In the future we can take advantage of m_dict->interpid + * to decref the dict using the owning interpreter. */ + Py_XDECREF(value->m_dict->copied); value->m_dict = NULL; } } @@ -1081,23 +1140,12 @@ get_cached_m_dict(struct extensions_cache_value *value, return m_dict; } -static void -clear_extensions_cache_value(struct extensions_cache_value *value) -{ - assert(value != NULL); - /* If we hadn't made the stored defs immortal, we would decref here. - However, this decref would be problematic if the module def were - dynamically allocated, it were the last ref, and this function - were called with an interpreter other than the def's owner. */ - assert(value->def == NULL || _Py_IsImmortal(value->def)); - del_cached_m_dict(value, NULL); -} - static void del_extensions_cache_value(struct extensions_cache_value *value) { if (value != NULL) { - clear_extensions_cache_value(value); + del_cached_m_dict(value); + del_cached_def(value); free_extensions_cache_value(value); } } @@ -1226,89 +1274,88 @@ _extensions_cache_set(PyObject *path, PyObject *name, assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortalUntracked((PyObject *)def); - - /* Generate the new cache value data before taking the lock. */ - struct extensions_cache_value updates = { - .def=def, - .m_init=m_init, - .origin=origin, - }; - def->m_base.m_init = m_init; - if (set_cached_m_dict(&updates, m_dict) < 0) { - return -1; - } - - /* Move on to the locked section. */ void *key = NULL; struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; + PyModuleDef_Base olddefbase = def->m_base; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { if (_extensions_cache_init() < 0) { - goto error; + goto finally; } } - /* Ensure there's a cached value for the module. */ + /* Create a cached value to populate for the module. */ _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); + value = entry == NULL + ? NULL + : (struct extensions_cache_value *)entry->value; + /* We should never be updating an existing cache value. */ + assert(value == NULL); + if (value != NULL) { + PyErr_Format(PyExc_SystemError, + "extension module %R is already cached", name); + goto finally; + } + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + + /* Populate the new cache value data. */ + *newvalue = (struct extensions_cache_value){ + .def=def, + .m_init=m_init, + /* m_dict is set by set_cached_m_dict(). */ + .origin=origin, + }; + if (init_cached_m_dict(newvalue, m_dict) < 0) { + goto finally; + } + fixup_cached_def(newvalue); + if (entry == NULL) { /* It was never added. */ - newvalue = alloc_extensions_cache_value(); - if (newvalue == NULL) { - goto error; - } if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); - goto error; + goto finally; } - value = newvalue; /* The hashtable owns the key now. */ key = NULL; } + else if (value == NULL) { + /* It was previously deleted. */ + entry->value = newvalue; + } else { - value = (struct extensions_cache_value *)entry->value; - if (value == NULL) { - /* It was previously deleted. */ - newvalue = alloc_extensions_cache_value(); - if (newvalue == NULL) { - goto error; - } - value = newvalue; - entry->value = value; - } - else { - /* We expect it to be static, so it must be the same pointer. */ - assert(value->def == def); - /* We expect the same symbol to be used and the shared object file - * to have remained loaded, so it must be the same pointer. */ - assert(value->m_init == m_init); - assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); - } + /* We are updating the entry for an existing module. */ + /* We expect def to be static, so it must be the same pointer. */ + assert(value->def == def); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(value->m_init == m_init); + /* The same module can't switch between caching __dict__ and not. */ + assert((value->m_dict == NULL) == (m_dict == NULL)); + /* This shouldn't ever happen. */ + Py_UNREACHABLE(); } - /* Fill the cache value with the data we generated earlier. */ - // XXX Skipping this (or an extra incref in set_cached_m_dict() - // should not be necessary. - //del_cached_m_dict(value, updates); - *value = updates; - res = 0; - goto finally; -error: - if (newvalue != NULL) { - del_extensions_cache_value(newvalue); +finally: + if (res < 0) { + restore_old_cached_def(def, &olddefbase); + if (newvalue != NULL) { + del_extensions_cache_value(newvalue); + } + } + else { + cleanup_old_cached_def(&olddefbase); } - clear_extensions_cache_value(&updates); -finally: extensions_lock_release(); if (key != NULL) { hashtable_destroy_str(key); @@ -1875,15 +1922,21 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); - struct singlephase_global_update singlephase = { - /* We don't want def->m_base.m_copy populated. */ - .m_dict=NULL, - .origin=_Py_ext_module_origin_CORE, - }; - if (update_global_state_for_extension( - tstate, nameobj, nameobj, def, &singlephase) < 0) - { - goto finally; + /* We aren't using import_find_extension() for core modules, + * so we have to do the extra check to make sure the module + * isn't already in the global cache before calling + * update_global_state_for_extension(). */ + if (_extensions_cache_get(nameobj, nameobj) == NULL) { + struct singlephase_global_update singlephase = { + /* We don't want def->m_base.m_copy populated. */ + .m_dict=NULL, + .origin=_Py_ext_module_origin_CORE, + }; + if (update_global_state_for_extension( + tstate, nameobj, nameobj, def, &singlephase) < 0) + { + goto finally; + } } if (finish_singlephase_extension(tstate, mod, def, nameobj, modules) < 0) { @@ -1938,6 +1991,17 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) goto finally; } + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } + struct _inittab *found = NULL; for (struct _inittab *p = INITTAB; p->name != NULL; p++) { if (_PyUnicode_EqualToASCIIString(info.name, p->name)) { @@ -4299,6 +4363,17 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) } /* Otherwise it must be multi-phase init or the first time it's loaded. */ + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } + if (PySys_Audit("import", "OOOOO", info.name, info.filename, Py_None, Py_None, Py_None) < 0) { From e42e797a7daf572a0a8edd8d77230d6efca5290e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:15:04 -0600 Subject: [PATCH 20/23] Add the per-interpreter cache index to each global cache entry. --- Python/import.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Python/import.c b/Python/import.c index c56e85da3465ad..c609f85924f4b0 100644 --- a/Python/import.c +++ b/Python/import.c @@ -971,6 +971,11 @@ struct extensions_cache_value { It is set by update_global_state_for_extension(). */ PyModInitFunction m_init; + /* The module's index into its interpreter's modules_by_index cache. + This is set for all extension modules but only used for legacy ones. + (See PyInterpreterState.modules_by_index for more info.) */ + Py_ssize_t m_index; + /* A copy of the module's __dict__ after the first time it was loaded. This is only set/used for legacy modules that do not support multiple initializations. @@ -1261,7 +1266,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) static int _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def, PyModInitFunction m_init, - PyObject *m_dict, _Py_ext_module_origin origin) + Py_ssize_t m_index, PyObject *m_dict, + _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1309,6 +1315,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, *newvalue = (struct extensions_cache_value){ .def=def, .m_init=m_init, + .m_index=m_index, /* m_dict is set by set_cached_m_dict(). */ .origin=origin, }; @@ -1531,8 +1538,9 @@ _get_extension_kind(PyModuleDef *def, bool check_size) struct singlephase_global_update { - PyObject *m_dict; PyModInitFunction m_init; + Py_ssize_t m_index; + PyObject *m_dict; _Py_ext_module_origin origin; }; @@ -1591,7 +1599,8 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, m_init, m_dict, singlephase->origin) < 0) + path, name, def, m_init, singlephase->m_index, m_dict, + singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. @@ -1817,6 +1826,10 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, /* Update global import state. */ struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, .origin=info->origin, }; // gh-88216: Extensions and def->m_base.m_copy can be updated @@ -1921,6 +1934,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, assert_singlephase_def(def); assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); + assert(def->m_base.m_index >= 0); /* We aren't using import_find_extension() for core modules, * so we have to do the extra check to make sure the module @@ -1928,6 +1942,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, * update_global_state_for_extension(). */ if (_extensions_cache_get(nameobj, nameobj) == NULL) { struct singlephase_global_update singlephase = { + .m_index=def->m_base.m_index, /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, .origin=_Py_ext_module_origin_CORE, From faa425426260192fd337e077664868920b6dc788 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 4 May 2024 12:53:35 -0600 Subject: [PATCH 21/23] Fix an assert. --- Python/import.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/Python/import.c b/Python/import.c index c609f85924f4b0..a0920ba2d5d66c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1740,7 +1740,8 @@ reload_singlephase_extension(PyThreadState *tstate, static PyObject * import_find_extension(PyThreadState *tstate, - struct _Py_ext_module_loader_info *info) + struct _Py_ext_module_loader_info *info, + struct extensions_cache_value **p_cached) { /* Only single-phase init modules will be in the cache. */ struct extensions_cache_value *cached @@ -1750,6 +1751,7 @@ import_find_extension(PyThreadState *tstate, } assert(cached->def != NULL); assert_singlephase(cached); + *p_cached = cached; /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1770,6 +1772,7 @@ import_find_extension(PyThreadState *tstate, PySys_FormatStderr("import %U # previously loaded (%R)\n", info->name, info->path); } + return mod; } @@ -1991,14 +1994,14 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) return NULL; } - PyObject *mod = import_find_extension(tstate, &info); + struct extensions_cache_value *cached = NULL; + PyObject *mod = import_find_extension(tstate, &info, &cached); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); -#ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); -#endif - assert(cached->def == _PyModule_GetDef(mod)); + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); assert_singlephase(cached); goto finally; } @@ -2009,8 +2012,6 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) /* If the module was added to the global cache * but def->m_base.m_copy was cleared (e.g. subinterp fini) * then we have to do a little dance here. */ - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); if (cached != NULL) { assert(cached->def->m_base.m_copy == NULL); /* For now we clear the cache and move on. */ @@ -4361,13 +4362,12 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) return NULL; } - mod = import_find_extension(tstate, &info); + struct extensions_cache_value *cached = NULL; + mod = import_find_extension(tstate, &info, &cached); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); -#ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); -#endif + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ assert(_PyModule_GetDef(mod) == NULL || cached->def == _PyModule_GetDef(mod)); assert_singlephase(cached); @@ -4381,8 +4381,6 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) /* If the module was added to the global cache * but def->m_base.m_copy was cleared (e.g. subinterp fini) * then we have to do a little dance here. */ - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); if (cached != NULL) { assert(cached->def->m_base.m_copy == NULL); /* For now we clear the cache and move on. */ From 5276981d2f61519130a9a954570525b3fb805f7b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:15:04 -0600 Subject: [PATCH 22/23] Properly sync the cached m_index and the def m_index. --- Python/import.c | 186 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 138 insertions(+), 48 deletions(-) diff --git a/Python/import.c b/Python/import.c index a0920ba2d5d66c..e7fd6cec81c0d1 100644 --- a/Python/import.c +++ b/Python/import.c @@ -446,6 +446,45 @@ _PyImport_GetNextModuleIndex(void) return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1; } +#ifndef NDEBUG +struct extensions_cache_value; +struct extensions_cache_value * _find_cached_def(PyModuleDef *); +static Py_ssize_t _get_cached_module_index(struct extensions_cache_value *); +#endif + +static Py_ssize_t +_get_module_index_from_def(PyModuleDef *def) +{ + Py_ssize_t index = def->m_base.m_index; + assert(index > 0); +#ifndef NDEBUG + struct extensions_cache_value *cached = _find_cached_def(def); + assert(cached == NULL || index == _get_cached_module_index(cached)); +#endif + return index; +} + +static void +_set_module_index(PyModuleDef *def, Py_ssize_t index) +{ + assert(index > 0); + if (index == def->m_base.m_index) { + /* There's nothing to do. */ + } + else if (def->m_base.m_index == 0) { + /* It should have been initialized by PyModuleDef_Init(). + * We assert here to catch this in dev, but keep going otherwise. */ + assert(def->m_base.m_index != 0); + def->m_base.m_index = index; + } + else { + /* It was already set for a different module. + * We replace the old value. */ + assert(def->m_base.m_index > 0); + def->m_base.m_index = index; + } +} + static const char * _modules_by_index_check(PyInterpreterState *interp, Py_ssize_t index) { @@ -462,9 +501,8 @@ _modules_by_index_check(PyInterpreterState *interp, Py_ssize_t index) } static PyObject * -_modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_get(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; if (_modules_by_index_check(interp, index) != NULL) { return NULL; } @@ -474,11 +512,9 @@ _modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) static int _modules_by_index_set(PyInterpreterState *interp, - PyModuleDef *def, PyObject *module) + Py_ssize_t index, PyObject *module) { - assert(def != NULL); - assert(def->m_slots == NULL); - assert(def->m_base.m_index > 0); + assert(index > 0); if (MODULES_BY_INDEX(interp) == NULL) { MODULES_BY_INDEX(interp) = PyList_New(0); @@ -487,7 +523,6 @@ _modules_by_index_set(PyInterpreterState *interp, } } - Py_ssize_t index = def->m_base.m_index; while (PyList_GET_SIZE(MODULES_BY_INDEX(interp)) <= index) { if (PyList_Append(MODULES_BY_INDEX(interp), Py_None) < 0) { return -1; @@ -498,9 +533,8 @@ _modules_by_index_set(PyInterpreterState *interp, } static int -_modules_by_index_clear_one(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_clear_one(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; const char *err = _modules_by_index_check(interp, index); if (err != NULL) { Py_FatalError(err); @@ -517,7 +551,8 @@ PyState_FindModule(PyModuleDef* module) if (module->m_slots) { return NULL; } - return _modules_by_index_get(interp, module); + Py_ssize_t index = _get_module_index_from_def(module); + return _modules_by_index_get(interp, index); } /* _PyState_AddModule() has been completely removed from the C-API @@ -537,7 +572,9 @@ _PyState_AddModule(PyThreadState *tstate, PyObject* module, PyModuleDef* def) "PyState_AddModule called on module with slots"); return -1; } - return _modules_by_index_set(tstate->interp, def, module); + assert(def->m_slots == NULL); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_set(tstate->interp, index, module); } int @@ -557,7 +594,7 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) } PyInterpreterState *interp = tstate->interp; - Py_ssize_t index = def->m_base.m_index; + Py_ssize_t index = _get_module_index_from_def(def); if (MODULES_BY_INDEX(interp) && index < PyList_GET_SIZE(MODULES_BY_INDEX(interp)) && module == PyList_GET_ITEM(MODULES_BY_INDEX(interp), index)) @@ -566,7 +603,8 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) return -1; } - return _modules_by_index_set(interp, def, module); + assert(def->m_slots == NULL); + return _modules_by_index_set(interp, index, module); } int @@ -579,7 +617,8 @@ PyState_RemoveModule(PyModuleDef* def) "PyState_RemoveModule called on module with slots"); return -1; } - return _modules_by_index_clear_one(tstate->interp, def); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_clear_one(tstate->interp, index); } @@ -1005,6 +1044,13 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static Py_ssize_t +_get_cached_module_index(struct extensions_cache_value *cached) +{ + assert(cached->m_index > 0); + return cached->m_index; +} + static void fixup_cached_def(struct extensions_cache_value *value) { @@ -1026,6 +1072,9 @@ fixup_cached_def(struct extensions_cache_value *value) def->m_base.m_init = value->m_init; + assert(value->m_index > 0); + _set_module_index(def, value->m_index); + /* Different modules can share the same def, so we can't just * expect m_copy to be NULL. */ assert(def->m_base.m_copy == NULL @@ -1200,6 +1249,41 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +#ifndef NDEBUG +struct hashtable_next_match_def_data { + PyModuleDef *def; + struct extensions_cache_value *matched; +}; + +static int +hashtable_next_match_def(_Py_hashtable_t *ht, + const void *key, const void *value, void *user_data) +{ + if (value == NULL) { + /* It was previously deleted. */ + return 0; + } + struct hashtable_next_match_def_data *data + = (struct hashtable_next_match_def_data *)user_data; + struct extensions_cache_value *cur + = (struct extensions_cache_value *)value; + if (cur->def == data->def) { + data->matched = cur; + return 1; + } + return 0; +} + +struct extensions_cache_value * +_find_cached_def(PyModuleDef *def) +{ + struct hashtable_next_match_def_data data = {0}; + (void)_Py_hashtable_foreach( + EXTENSIONS.hashtable, hashtable_next_match_def, &data); + return data.matched; +} +#endif + #define HTSEP ':' static int @@ -1263,13 +1347,17 @@ _extensions_cache_get(PyObject *path, PyObject *name) } /* This can only fail with "out of memory". */ -static int +static struct extensions_cache_value * _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def, PyModInitFunction m_init, Py_ssize_t m_index, PyObject *m_dict, _Py_ext_module_origin origin) { - int res = -1; + struct extensions_cache_value *value = NULL; + void *key = NULL; + struct extensions_cache_value *newvalue = NULL; + PyModuleDef_Base olddefbase = def->m_base; + assert(def != NULL); assert(m_init == NULL || m_dict == NULL); /* We expect the same symbol to be used and the shared object file @@ -1280,11 +1368,6 @@ _extensions_cache_set(PyObject *path, PyObject *name, assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - void *key = NULL; - struct extensions_cache_value *value = NULL; - struct extensions_cache_value *newvalue = NULL; - PyModuleDef_Base olddefbase = def->m_base; - extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1350,10 +1433,10 @@ _extensions_cache_set(PyObject *path, PyObject *name, Py_UNREACHABLE(); } - res = 0; + value = newvalue; finally: - if (res < 0) { + if (value == NULL) { restore_old_cached_def(def, &olddefbase); if (newvalue != NULL) { del_extensions_cache_value(newvalue); @@ -1368,7 +1451,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, hashtable_destroy_str(key); } - return res; + return value; } static void @@ -1544,12 +1627,13 @@ struct singlephase_global_update { _Py_ext_module_origin origin; }; -static int +static struct extensions_cache_value * update_global_state_for_extension(PyThreadState *tstate, PyObject *path, PyObject *name, PyModuleDef *def, struct singlephase_global_update *singlephase) { + struct extensions_cache_value *cached = NULL; PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; @@ -1594,34 +1678,34 @@ update_global_state_for_extension(PyThreadState *tstate, // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { #ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(path, name); + cached = _extensions_cache_get(path, name); assert(cached == NULL || cached->def == def); #endif - if (_extensions_cache_set( + cached = _extensions_cache_set( path, name, def, m_init, singlephase->m_index, m_dict, - singlephase->origin) < 0) - { + singlephase->origin); + if (cached == NULL) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. - return -1; + return NULL; } } - return 0; + return cached; } /* For multi-phase init modules, the module is finished * by PyModule_FromDefAndSpec(). */ static int -finish_singlephase_extension(PyThreadState *tstate, - PyObject *mod, PyModuleDef *def, +finish_singlephase_extension(PyThreadState *tstate, PyObject *mod, + struct extensions_cache_value *cached, PyObject *name, PyObject *modules) { assert(mod != NULL && PyModule_Check(mod)); - assert(def == _PyModule_GetDef(mod)); + assert(cached->def == _PyModule_GetDef(mod)); - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { return -1; } @@ -1729,7 +1813,8 @@ reload_singlephase_extension(PyThreadState *tstate, } } - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { PyMapping_DelItem(modules, info->name); Py_DECREF(mod); return NULL; @@ -1786,6 +1871,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, PyObject *mod = NULL; PyModuleDef *def = NULL; + struct extensions_cache_value *cached = NULL; const char *name_buf = PyBytes_AS_STRING(info->name_encoded); struct _Py_ext_module_loader_result res; @@ -1828,6 +1914,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } /* Update global import state. */ + assert(def->m_base.m_index != 0); struct singlephase_global_update singlephase = { // XXX Modules that share a def should each get their own index, // whereas currently they share (which means the per-interpreter @@ -1849,16 +1936,16 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, assert(def->m_base.m_copy == NULL); singlephase.m_init = p0; } - if (update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase) < 0) - { + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { goto error; } /* Update per-interpreter import state. */ PyObject *modules = get_modules_dict(tstate, true); if (finish_singlephase_extension( - tstate, mod, def, info->name, modules) < 0) + tstate, mod, cached, info->name, modules) < 0) { goto error; } @@ -1893,8 +1980,9 @@ clear_singlephase_extension(PyInterpreterState *interp, // We leave m_index alone since there's no reason to reset it. /* Clear the PyState_*Module() cache entry. */ - if (_modules_by_index_check(interp, def->m_base.m_index) == NULL) { - if (_modules_by_index_clear_one(interp, def) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_check(interp, index) == NULL) { + if (_modules_by_index_clear_one(interp, index) < 0) { return -1; } } @@ -1943,21 +2031,23 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, * so we have to do the extra check to make sure the module * isn't already in the global cache before calling * update_global_state_for_extension(). */ - if (_extensions_cache_get(nameobj, nameobj) == NULL) { + struct extensions_cache_value *cached + = _extensions_cache_get(nameobj, nameobj); + if (cached == NULL) { struct singlephase_global_update singlephase = { .m_index=def->m_base.m_index, /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, .origin=_Py_ext_module_origin_CORE, }; - if (update_global_state_for_extension( - tstate, nameobj, nameobj, def, &singlephase) < 0) - { + cached = update_global_state_for_extension( + tstate, nameobj, nameobj, def, &singlephase); + if (cached == NULL) { goto finally; } } - if (finish_singlephase_extension(tstate, mod, def, nameobj, modules) < 0) { + if (finish_singlephase_extension(tstate, mod, cached, nameobj, modules) < 0) { goto finally; } From 7d3dbc18e643d060a3b89d939767b73a5ff4a436 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 4 May 2024 15:00:35 -0600 Subject: [PATCH 23/23] Fix make smelly. --- Python/import.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/import.c b/Python/import.c index b484e2d28d2625..fa0e548c9bf4c4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -448,7 +448,7 @@ _PyImport_GetNextModuleIndex(void) #ifndef NDEBUG struct extensions_cache_value; -struct extensions_cache_value * _find_cached_def(PyModuleDef *); +static struct extensions_cache_value * _find_cached_def(PyModuleDef *); static Py_ssize_t _get_cached_module_index(struct extensions_cache_value *); #endif @@ -1274,7 +1274,7 @@ hashtable_next_match_def(_Py_hashtable_t *ht, return 0; } -struct extensions_cache_value * +static struct extensions_cache_value * _find_cached_def(PyModuleDef *def) { struct hashtable_next_match_def_data data = {0};