diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 68b536f75ca5e0..f81829733d94c5 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -263,6 +263,9 @@ cache_backoff(_PyAdaptiveEntry *entry) { entry->counter = ADAPTIVE_CACHE_BACKOFF; } +/* _interpreter_frame is defined in pycore_frame.h */ +typedef struct _interpreter_frame InterpreterFrame; + /* Specialization functions */ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); @@ -272,7 +275,8 @@ int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *na int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); int _Py_Specialize_CallNoKw(PyObject *callable, _Py_CODEUNIT *instr, int nargs, - PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins); + PyObject *kwnames, SpecializedCacheEntry *cache, PyObject *builtins, + PyObject **stack_pointer, InterpreterFrame *frame, PyObject *names); void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c480a3a57b436c..9b10398345768e 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -43,6 +43,11 @@ extern PyStatus _PyTypes_InitSlotDefs(void); extern void _PyStaticType_Dealloc(PyTypeObject *type); +/* _interpreter_frame is defined in pycore_frame.h */ +typedef struct _interpreter_frame InterpreterFrame; + +PyObject *_PySuper_Lookup(PyTypeObject *, PyObject *, PyObject *, int *); +int _PySuper_GetTypeArgs(InterpreterFrame *, PyCodeObject *, PyTypeObject **, PyObject **); #ifdef __cplusplus } diff --git a/Include/opcode.h b/Include/opcode.h index bce7010ab186b4..646c4baae010ae 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -173,6 +173,8 @@ extern "C" { #define LOAD_FAST__LOAD_CONST 143 #define LOAD_CONST__LOAD_FAST 150 #define STORE_FAST__STORE_FAST 153 +#define CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED 154 +#define CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED 158 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index c672aa59f8ec3e..bf2a050d492ce4 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -288,6 +288,9 @@ def jabs_op(name, op): "LOAD_FAST__LOAD_CONST", "LOAD_CONST__LOAD_FAST", "STORE_FAST__STORE_FAST", + # Specialized super instructions. + "CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED", + "CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED", ] _specialization_stats = [ "success", diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-29-00-44-09.bpo-46564.MwLSHf.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-29-00-44-09.bpo-46564.MwLSHf.rst new file mode 100644 index 00000000000000..3f2722c42cbfa6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-29-00-44-09.bpo-46564.MwLSHf.rst @@ -0,0 +1,5 @@ +Method calls on :class:`super` are sped up. The 2-argument form, +``super(type, obj).meth()`` is now nearly as fast as an equivalent +``self.meth()`` call. The 0-argument form, while still slower, is still +faster than in previous versions of CPython. Patch by Ken Jin, with +additional contributions by Vladimir Matveev. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index f7e0775e2225b7..097eef53c7c756 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8846,16 +8846,18 @@ super_repr(PyObject *self) ", NULL>", su->type ? su->type->tp_name : "NULL"); } +/* Forward */ +static PyTypeObject *supercheck(PyTypeObject *type, PyObject *obj); static PyObject * -super_getattro(PyObject *self, PyObject *name) +do_super_lookup(superobject *su, PyTypeObject *su_type, PyObject *su_obj, + PyTypeObject *su_obj_type, PyObject *name, int *meth_found) { - superobject *su = (superobject *)self; PyTypeObject *starttype; PyObject *mro; Py_ssize_t i, n; - starttype = su->obj_type; + starttype = su_obj_type; if (starttype == NULL) goto skip; @@ -8875,7 +8877,7 @@ super_getattro(PyObject *self, PyObject *name) /* No need to check the last one: it's gonna be skipped anyway. */ for (i = 0; i+1 < n; i++) { - if ((PyObject *)(su->type) == PyTuple_GET_ITEM(mro, i)) + if ((PyObject *)(su_type) == PyTuple_GET_ITEM(mro, i)) break; } i++; /* skip su->type (if any) */ @@ -8893,17 +8895,25 @@ super_getattro(PyObject *self, PyObject *name) PyObject *res = PyDict_GetItemWithError(dict, name); if (res != NULL) { Py_INCREF(res); - - descrgetfunc f = Py_TYPE(res)->tp_descr_get; - if (f != NULL) { - PyObject *res2; - res2 = f(res, - /* Only pass 'obj' param if this is instance-mode super - (See SF ID #743627) */ - (su->obj == (PyObject *)starttype) ? NULL : su->obj, - (PyObject *)starttype); - Py_DECREF(res); - res = res2; + if (meth_found && + _PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)) { + *meth_found = 1; + } + else { + if (meth_found) { + *meth_found = 0; + } + descrgetfunc f = Py_TYPE(res)->tp_descr_get; + if (f != NULL) { + PyObject *res2; + res2 = f(res, + /* Only pass 'obj' param if this is instance-mode super + (See SF ID #743627) */ + (su_obj == (PyObject *)starttype) ? NULL : su_obj, + (PyObject *)starttype); + Py_DECREF(res); + res = res2; + } } Py_DECREF(mro); @@ -8919,7 +8929,31 @@ super_getattro(PyObject *self, PyObject *name) Py_DECREF(mro); skip: - return PyObject_GenericGetAttr(self, name); + /* only happens when using manual _PySuper_Lookup, never happens in super_getattro */ + if (su == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + return PyObject_GenericGetAttr((PyObject *)su, name); +} + +static PyObject * +super_getattro(PyObject *self, PyObject *name) +{ + superobject *su = (superobject *)self; + return do_super_lookup(su, su->type, su->obj, su->obj_type, name, NULL); +} + +PyObject * +_PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj, PyObject *name, int *meth_found) +{ + PyTypeObject *starttype = supercheck(su_type, su_obj); + if (starttype == NULL) { + return NULL; + } + PyObject *res = do_super_lookup(NULL, su_type, su_obj, starttype, name, meth_found); + Py_DECREF(starttype); + return res; } static PyTypeObject * @@ -9011,8 +9045,8 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type) } } -static int -super_init_without_args(InterpreterFrame *cframe, PyCodeObject *co, +int +_PySuper_GetTypeArgs(InterpreterFrame *cframe, PyCodeObject *co, PyTypeObject **type_p, PyObject **obj_p) { if (co->co_argcount == 0) { @@ -9102,7 +9136,8 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds) "super(): no current frame"); return -1; } - int res = super_init_without_args(cframe, cframe->f_code, &type, &obj); + + int res = _PySuper_GetTypeArgs(cframe, cframe->f_code, &type, &obj); if (res < 0) { return -1; diff --git a/Python/ceval.c b/Python/ceval.c index b69d5aa9d32069..caf2e4d21f1b89 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1361,6 +1361,7 @@ eval_frame_handle_pending(PyThreadState *tstate) /* The integer overflow is checked by an assertion below. */ #define INSTR_OFFSET() ((int)(next_instr - first_instr)) +#define NEXT_INSTR_OFFSET() ((int)(next_instr+1 - first_instr)) #define NEXTOPARG() do { \ _Py_CODEUNIT word = *next_instr; \ opcode = _Py_OPCODE(word); \ @@ -1486,6 +1487,9 @@ eval_frame_handle_pending(PyThreadState *tstate) #define GET_CACHE() \ _GetSpecializedCacheEntryForInstruction(first_instr, INSTR_OFFSET(), oparg) +# define GET_NEXT_INSTR_CACHE() \ + _GetSpecializedCacheEntryForInstruction(first_instr, NEXT_INSTR_OFFSET(), \ + _Py_OPARG(*next_instr)) #define DEOPT_IF(cond, instname) if (cond) { goto instname ## _miss; } @@ -4633,7 +4637,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr int nargs = call_shape.total_args; int err = _Py_Specialize_CallNoKw( call_shape.callable, next_instr, nargs, - call_shape.kwnames, cache, BUILTINS()); + call_shape.kwnames, cache, BUILTINS(), + stack_pointer, frame, names); if (err < 0) { goto error; } @@ -5070,6 +5075,86 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED) { + /* super().meth */ + assert(_Py_OPCODE(next_instr[0]) == LOAD_METHOD_ADAPTIVE); + assert(_Py_OPCODE(next_instr[-2]) != PRECALL_METHOD); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyObjectCache *cache1 = &caches[-1].obj; + _PyAdaptiveEntry *lm_adaptive = &caches[-2].adaptive; + assert(lm_adaptive == &GET_NEXT_INSTR_CACHE()[0].adaptive); + assert(call_shape.total_args == 0); + + /* CALL_NO_KW_SUPER */ + PyObject *su_obj; + PyTypeObject *su_type; + PyObject *meth; + PyObject *super_callable = TOP(); + + DEOPT_IF(_PyType_CAST(super_callable) != &PySuper_Type, CALL); + /* super() - zero argument form */ + if (_PySuper_GetTypeArgs(frame, frame->f_code, &su_type, &su_obj) < 0) { + PyErr_Clear(); + DEOPT_IF(1, CALL); + } + assert(su_obj != NULL); + DEOPT_IF(lm_adaptive->version != Py_TYPE(su_obj)->tp_version_tag, CALL); + DEOPT_IF(cache0->version != su_type->tp_version_tag, CALL); + STAT_INC(CALL, hit); + + /* LOAD_METHOD_CACHED */ + meth = cache1->obj; + assert(meth != NULL && _PyType_HasFeature(Py_TYPE(meth), Py_TPFLAGS_METHOD_DESCRIPTOR)); + Py_INCREF(meth); + SET_TOP(meth); + Py_INCREF(su_obj); + PUSH(su_obj); + + Py_DECREF(super_callable); + next_instr++; + DISPATCH(); + } + + TARGET(CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED) { + /* super(type, obj).meth */ + assert(_Py_OPCODE(next_instr[0]) == LOAD_METHOD_ADAPTIVE); + assert(_Py_OPCODE(next_instr[-2]) != PRECALL_METHOD); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyObjectCache *cache1 = &caches[-1].obj; + _PyAdaptiveEntry *lm_adaptive = &caches[-2].adaptive; + assert(lm_adaptive == &GET_NEXT_INSTR_CACHE()[0].adaptive); + assert(call_shape.total_args == 2); + assert(call_shape.kwnames == NULL); + + /* CALL_NO_KW_SUPER */ + /* super(type, obj) - two argument form */ + PyObject *su_obj = TOP(); + PyTypeObject *su_type = _PyType_CAST(SECOND()); + PyObject *super_callable = THIRD(); + PyObject *meth; + + DEOPT_IF(_PyType_CAST(super_callable) != &PySuper_Type, CALL); + assert(su_obj != NULL); + DEOPT_IF(lm_adaptive->version != Py_TYPE(su_obj)->tp_version_tag, CALL); + DEOPT_IF(cache0->version != su_type->tp_version_tag, CALL); + STAT_INC(CALL, hit); + + (void)(POP()); + /* LOAD_METHOD_CACHED */ + meth = cache1->obj; + assert(meth != NULL && _PyType_HasFeature(Py_TYPE(meth), Py_TPFLAGS_METHOD_DESCRIPTOR)); + Py_INCREF(meth); + SET_SECOND(meth); + SET_TOP(su_obj); + + Py_DECREF(super_callable); + Py_DECREF(su_type); + next_instr++; + DISPATCH(); + } + TARGET(CALL_FUNCTION_EX) { PREDICTED(CALL_FUNCTION_EX); PyObject *func, *callargs, *kwargs = NULL, *result; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 1a809ed409d581..a8a7649280e64c 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -153,11 +153,11 @@ static void *opcode_targets[256] = { &&TARGET_RESUME, &&TARGET_MATCH_CLASS, &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, + &&TARGET_CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&_unknown_opcode, + &&TARGET_CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED, &&_unknown_opcode, &&TARGET_LOAD_METHOD, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index aec94d9e60be41..1986a19df6a0d5 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -4,6 +4,7 @@ #include "pycore_long.h" #include "pycore_moduleobject.h" #include "pycore_object.h" +#include "pycore_frame.h" #include "opcode.h" #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX @@ -961,6 +962,12 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, _PyObjectCache *cache2 = &cache[-2].obj; PyTypeObject *owner_cls = Py_TYPE(owner); + _Py_CODEUNIT prev_instr = _Py_OPCODE(instr[-1]); + if (prev_instr == CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED || + prev_instr == CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED) { + /* Our own cache entries are already being used by superinstructions. */ + goto fail; + } if (PyModule_CheckExact(owner)) { int err = specialize_module_load_attr(owner, instr, name, cache0, cache1, LOAD_METHOD, LOAD_METHOD_MODULE); @@ -1351,7 +1358,8 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins static int specialize_class_call( PyObject *callable, _Py_CODEUNIT *instr, - int nargs, PyObject *kwnames, SpecializedCacheEntry *cache) + int nargs, PyObject *kwnames, SpecializedCacheEntry *cache, + PyObject **stack_pointer, InterpreterFrame *frame, PyObject *names) { PyTypeObject *tp = _PyType_CAST(callable); if (tp->tp_new == PyBaseObject_Type.tp_new) { @@ -1373,6 +1381,58 @@ specialize_class_call( return 0; } } + /* Adaptive super instruction of CALL and LOAD_METHOD_ADAPTIVE. */ + if (tp == &PySuper_Type && + kwnames == NULL && + /* Important: this also protects us from accidentally overriding a + the next specialized instruction's cache. We can only use the + subsequent LOAD_METHOD cache if it hasn't specialized yet. + */ + _Py_OPCODE(instr[1]) == LOAD_METHOD_ADAPTIVE && + _Py_OPCODE(instr[-1]) == PRECALL_FUNCTION && + (nargs == 0 || nargs == 2)) { + /* Use load_method cache entries too. */ + _PyAdaptiveEntry *lm_adaptive = &cache[-cache_requirements[CALL]].adaptive; + _PyObjectCache *cache1 = &cache[-1].obj; + PyObject *su_obj; + PyTypeObject *su_type; + PyObject *meth; + int meth_found; + PyObject *name = PyTuple_GET_ITEM(names, lm_adaptive->original_oparg); + + /* Note (KJ): the following operations must not affect tp_version_tag. */ + /* super() zero arg form. */ + if (nargs == 0) { + if (_PySuper_GetTypeArgs(frame, frame->f_code, &su_type, &su_obj) < 0) { + PyErr_Clear(); + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_NOT_DESCRIPTOR); + return -1; + } + } + /* super(su_type, su_obj) two arg form. */ + else if (nargs == 2) { + su_type = _PyType_CAST(stack_pointer[-2]); + su_obj = stack_pointer[-1]; + } + meth = _PySuper_Lookup(su_type, su_obj, name, &meth_found); + if (meth == NULL) { + assert(PyErr_Occurred()); + PyErr_Clear(); + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OTHER); + return -1; + } + Py_DECREF(meth); + if (!meth_found) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_NOT_DESCRIPTOR); + return -1; + } + cache->adaptive.version = su_type->tp_version_tag; + cache1->obj = meth; /* borrowed */ + lm_adaptive->version = Py_TYPE(su_obj)->tp_version_tag; + *instr = _Py_MAKECODEUNIT(nargs == 0 ? CALL_NO_KW_SUPER_0__LOAD_METHOD_CACHED + : CALL_NO_KW_SUPER_2__LOAD_METHOD_CACHED, _Py_OPARG(*instr)); + return 0; + } if (tp->tp_vectorcall != NULL) { *instr = _Py_MAKECODEUNIT(CALL_BUILTIN_CLASS, _Py_OPARG(*instr)); return 0; @@ -1616,8 +1676,9 @@ call_fail_kind(PyObject *callable) int _Py_Specialize_CallNoKw( PyObject *callable, _Py_CODEUNIT *instr, - int nargs, PyObject *kwnames, - SpecializedCacheEntry *cache, PyObject *builtins) + int nargs, PyObject *kwnames, SpecializedCacheEntry *cache, + PyObject *builtins, PyObject **stack_pointer, InterpreterFrame *frame, + PyObject *names) { _PyAdaptiveEntry *cache0 = &cache->adaptive; int fail; @@ -1628,7 +1689,8 @@ _Py_Specialize_CallNoKw( fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, kwnames, cache); } else if (PyType_Check(callable)) { - fail = specialize_class_call(callable, instr, nargs, kwnames, cache); + fail = specialize_class_call(callable, instr, nargs, kwnames, cache, stack_pointer, + frame, names); } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { fail = specialize_method_descriptor(